In [3]:
import pandas as pd
import numpy as np

In [4]:
# create a small dictionary with different data types

dft = pd.DataFrame(dict(A = np.random.rand(3),
                        B = 1,
                        C = 'foo',
                        D = pd.Timestamp('20010102'),
                        E = pd.Series([1.0]*3).astype('float32'),
                                F = False,
                                G = pd.Series([1]*3,dtype='int8')))

dft

Unnamed: 0,A,B,C,D,E,F,G
0,0.747007,1,foo,2001-01-02,1.0,False,1
1,0.226602,1,foo,2001-01-02,1.0,False,1
2,0.456349,1,foo,2001-01-02,1.0,False,1


In [5]:
# There is a really easy way to see what kind of dtypes 
# are in each column. 

dft.dtypes

A           float64
B             int64
C            object
D    datetime64[ns]
E           float32
F              bool
G              int8
dtype: object

In [6]:
# If a pandas object contains data multiple dtypes IN A 
# SINGLE COLUMN, the dtype of the column will be chosen 
# to accommodate all of the data types (object is the 
# most general).
# these ints are coerced to floats

pd.Series([1, 2, 3, 4, 5, 6.])

0    1.0
1    2.0
2    3.0
3    4.0
4    5.0
5    6.0
dtype: float64

In [7]:
# string data forces an ``object`` dtype

test = pd.Series([1, 2, 3, 6., 'foo'])
type(test[4])

str

In [8]:
# The method get_dtype_counts() will return the number 
# of columns of each type in a DataFrame:

dft.get_dtype_counts()

bool              1
datetime64[ns]    1
float32           1
float64           1
int64             1
int8              1
object            1
dtype: int64

In [9]:
# create a small data frame. 

df = pd.DataFrame(np.random.randn(5, 4), columns=['a', 'b', 'c', 'd'])
df

Unnamed: 0,a,b,c,d
0,1.47195,0.243138,-1.30621,0.510817
1,0.575049,0.412122,0.30073,-0.519388
2,-0.290422,-0.128465,-0.220626,1.130433
3,0.134035,1.235159,1.018661,1.737657
4,0.067413,-0.443281,-0.470369,0.766671


In [15]:
# Use df.apply to find the square root of all the values. 
# NaN means not a number

dfnan = df.apply(np.sqrt)
dfnan

Unnamed: 0,a,b,c,d
0,1.213239,0.49309,,0.714714
1,0.75832,0.641968,0.548389,
2,,,,1.063218
3,0.366108,1.111377,1.009287,1.318202
4,0.259639,,,0.875597


In [16]:
# find the mean of all of the columns

df.apply(np.mean, axis=0)


a    0.391605
b    0.263735
c   -0.135563
d    0.725238
dtype: float64

In [14]:
dfnan['a'] = dfnan.fillna(np.mean(df['a']))# fill in the Nan values in the "a" column
dfnan

Unnamed: 0,a,b,c,d
0,1.213239,0.49309,,0.714714
1,0.75832,0.641968,0.548389,
2,0.391605,,,1.063218
3,0.366108,1.111377,1.009287,1.318202
4,0.259639,,,0.875597


In [17]:
# find the mean of all of the rows

df.apply(np.mean, axis=1) #axis=1 is for rows
df

Unnamed: 0,a,b,c,d
0,1.47195,0.243138,-1.30621,0.510817
1,0.575049,0.412122,0.30073,-0.519388
2,-0.290422,-0.128465,-0.220626,1.130433
3,0.134035,1.235159,1.018661,1.737657
4,0.067413,-0.443281,-0.470369,0.766671


In [19]:
# Let's create a random array with 50 numbers, ranging 
# from 0 to 7.

data = np.random.randint(0, 7, size = 50)
data

array([3, 5, 4, 2, 4, 2, 6, 4, 0, 2, 1, 6, 0, 1, 3, 0, 1, 6, 3, 3, 5, 5, 6,
       2, 1, 3, 5, 1, 2, 5, 5, 2, 1, 2, 3, 1, 6, 0, 4, 0, 3, 2, 5, 3, 3, 5,
       4, 3, 0, 2])

In [22]:
# convert the array into a series

s = pd.Series(data)


In [23]:
# How many of each number is there in the series? Enter 
# value_counts()

pd.value_counts(s)

3    10
2     9
5     8
1     7
0     6
6     5
4     5
dtype: int64