In [1]:
import pandas as pd
import numpy as np

In [2]:
# create a small dictionary with different data types

df = pd.DataFrame(dict(A = np.random.rand(3),
                       B = 1,
                       C = 'foo',
                       D = pd.Timestamp('20010102'),
                       E = pd.Series([1.0]*3).astype('float32'),
                       F = False,
                       G = pd.Series([1]*3,dtype='int8')))

df

Unnamed: 0,A,B,C,D,E,F,G
0,0.764058,1,foo,2001-01-02,1.0,False,1
1,0.941741,1,foo,2001-01-02,1.0,False,1
2,0.468576,1,foo,2001-01-02,1.0,False,1


In [3]:
# There is a really easy way to see what kind of dtypes 
# are in each column. 

df.dtypes

A           float64
B             int64
C            object
D    datetime64[ns]
E           float32
F              bool
G              int8
dtype: object

In [4]:
# If a pandas object contains data multiple dtypes IN A 
# SINGLE COLUMN, the dtype of the column will be chosen 
# to accommodate all of the data types (object is the 
# most general).
# these ints are coerced to floats

pd.Series([1, 2, 3, 4, 5, 6.])

0    1.0
1    2.0
2    3.0
3    4.0
4    5.0
5    6.0
dtype: float64

In [5]:
# string data forces an ``object`` dtype

pd.Series([1, 2, 3, 6., 'foo'])

0      1
1      2
2      3
3      6
4    foo
dtype: object

#### Find the datatype for each element in the previous Series

In [6]:
# code here

In [7]:
# The method get_dtype_counts() will return the number 
# of columns of each type in a DataFrame:

df.get_dtype_counts()

bool              1
datetime64[ns]    1
float32           1
float64           1
int64             1
int8              1
object            1
dtype: int64

In [8]:
df.select_dtypes(exclude=['number'])
# df.select_dtypes(include=['bool'])

Unnamed: 0,C,D,F
0,foo,2001-01-02,False
1,foo,2001-01-02,False
2,foo,2001-01-02,False


In [9]:
# create a small data frame. 

df = pd.DataFrame(np.random.randn(5, 4), columns=['a', 'b', 'c', 'd'])
df

Unnamed: 0,a,b,c,d
0,-0.385435,1.046002,0.823809,-0.781188
1,-1.628201,-0.126106,-0.361818,1.362368
2,1.337482,-0.929826,-0.493583,0.125314
3,1.355557,0.258271,0.850913,-1.425077
4,-0.815625,0.429894,-0.087784,0.763572


In [10]:
# Use df.apply to find the square root of all the values. 
# NaN means not a number

df.apply(np.sqrt)

Unnamed: 0,a,b,c,d
0,,1.022742,0.907639,
1,,,,1.167205
2,1.156496,,,0.353997
3,1.164284,0.508204,0.922449,
4,,0.655663,,0.873826


In [11]:
# df.applymap(lambda x: "({})".format(x))

In [12]:
# find the mean of all of the columns

df.apply(np.mean, axis=0)

a   -0.027244
b    0.135647
c    0.146307
d    0.008998
dtype: float64

In [13]:
# find the mean of all of the rows

df.apply(np.mean, axis=1)

0    0.175797
1   -0.188439
2    0.009847
3    0.259916
4    0.072515
dtype: float64

In [14]:
# Let's create a random array with 50 numbers, ranging 
# from 0 to 7.

data = np.random.randint(0, 7, size = 50)
data

array([4, 6, 0, 5, 0, 6, 0, 2, 6, 5, 4, 0, 5, 0, 4, 0, 2, 6, 5, 6, 1, 3, 2,
       5, 3, 3, 6, 0, 1, 3, 1, 5, 4, 6, 6, 3, 2, 3, 2, 4, 6, 0, 0, 3, 0, 5,
       3, 1, 4, 4])

In [17]:
# convert the array into a series

s = pd.Series(data)

In [18]:
# How many of each number is there in the series? Enter 
# value_counts()

pd.value_counts(s)

0    10
6     9
3     8
5     7
4     7
2     5
1     4
dtype: int64