In [1]:
import pandas as pd
import numpy as np

In [2]:
# create a small dictionary with different data types

dft = pd.DataFrame(dict(A = np.random.rand(3),
                        B = 1,
                        C = 'foo',
                        D = pd.Timestamp('20010102'),
                        E = pd.Series([1.0]*3).astype('float32'),
                                F = False,
                                G = pd.Series([1]*3,dtype='int8')))

dft

Unnamed: 0,A,B,C,D,E,F,G
0,0.59755,1,foo,2001-01-02,1.0,False,1
1,0.706858,1,foo,2001-01-02,1.0,False,1
2,0.385634,1,foo,2001-01-02,1.0,False,1


In [3]:
# There is a really easy way to see what kind of dtypes 
# are in each column. 

dft.dtypes

A           float64
B             int64
C            object
D    datetime64[ns]
E           float32
F              bool
G              int8
dtype: object

In [15]:
# If a pandas object contains data multiple dtypes IN A 
# SINGLE COLUMN, the dtype of the column will be chosen 
# to accommodate all of the data types (object is the 
# most general).
# these ints are coerced to floats

pd.Series([1, 2, 3, 4, 5, 6.])

0    1.0
1    2.0
2    3.0
3    4.0
4    5.0
5    6.0
dtype: float64

In [19]:
# string data forces an ``object`` dtype
a =pd.Series([1, 2, 3, 6., 'foo'])
print(a)
#a.dtype

0      1
1      2
2      3
3      6
4    foo
dtype: object


In [6]:
# The method get_dtype_counts() will return the number 
# of columns of each type in a DataFrame:

dft.get_dtype_counts()

bool              1
datetime64[ns]    1
float32           1
float64           1
int64             1
int8              1
object            1
dtype: int64

In [31]:
# create a small data frame. 

dfnan = pd.DataFrame(np.random.randn(5, 4), columns=['a', 'b', 'c', 'd'])
dfnan

Unnamed: 0,a,b,c,d
0,1.155942,1.727064,1.307215,1.010253
1,0.709983,1.627861,-0.870431,0.552618
2,0.158445,-2.13574,0.395378,-0.781799
3,-0.001988,-0.310566,1.280093,0.056724
4,0.81461,0.292945,-0.066354,0.336272


In [8]:
# Use df.apply to find the square root of all the values. 
# NaN means not a number

df.apply(np.sqrt)

Unnamed: 0,a,b,c,d
0,1.132104,,0.547846,
1,0.456564,1.147634,0.918713,0.287928
2,,0.877604,,
3,,,0.87879,1.062961
4,,0.949133,,0.584659


In [None]:
# find the mean of all of the columns

df.apply(np.mean, axis=0)

In [32]:
dfnan["a"] = dfnan.fillna(np.mean(df["a"]))
dfnan

Unnamed: 0,a,b,c,d
0,1.155942,1.727064,1.307215,1.010253
1,0.709983,1.627861,-0.870431,0.552618
2,0.158445,-2.13574,0.395378,-0.781799
3,-0.001988,-0.310566,1.280093,0.056724
4,0.81461,0.292945,-0.066354,0.336272


In [26]:
# find the mean of all of the rows

df.apply(np.mean, axis=1)



0   -0.815018
1    0.337570
2   -0.129506
3    0.965428
4    0.430636
dtype: float64

In [11]:
# Let's create a random array with 50 numbers, ranging 
# from 0 to 7.

data = np.random.randint(0, 7, size = 50)
data

array([1, 5, 1, 0, 5, 5, 6, 4, 2, 4, 1, 4, 0, 4, 2, 5, 6, 1, 6, 1, 5, 5, 3,
       6, 1, 5, 6, 3, 2, 6, 6, 0, 3, 1, 2, 4, 4, 3, 3, 4, 1, 0, 3, 4, 2, 1,
       3, 5, 4, 0])

In [33]:
# convert the array into a series

s = pd.Series(data)
#print(s)

0     1
1     5
2     1
3     0
4     5
5     5
6     6
7     4
8     2
9     4
10    1
11    4
12    0
13    4
14    2
15    5
16    6
17    1
18    6
19    1
20    5
21    5
22    3
23    6
24    1
25    5
26    6
27    3
28    2
29    6
30    6
31    0
32    3
33    1
34    2
35    4
36    4
37    3
38    3
39    4
40    1
41    0
42    3
43    4
44    2
45    1
46    3
47    5
48    4
49    0
dtype: int64


In [36]:
# How many of each number is there in the series? Enter 
# value_counts()

pd.value_counts(s,sort= True,ascending=False)

4    9
1    9
5    8
6    7
3    7
2    5
0    5
dtype: int64

In [None]:
np.