In [1]:
import numpy as np
import pandas as pd

In [2]:
vals1 = np.array([1, None, 3, 4])
vals1

array([1, None, 3, 4], dtype=object)

In [4]:
vals1.dtype

dtype('O')

In [5]:
V = np.arange(12).reshape(3,4)
V

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

In [6]:
V.dtype

dtype('int32')

In [7]:
V[0]

array([0, 1, 2, 3])

In [8]:
V[0][0]

0

In [9]:
np.arange(1E6)

array([0.00000e+00, 1.00000e+00, 2.00000e+00, ..., 9.99997e+05,
       9.99998e+05, 9.99999e+05])

In [10]:
for dtype in ['object', 'int', 'float', 'complex']:
    print("dtype=", dtype)
    %timeit np.arange(1E6, dtype=dtype).sum()
    print()

dtype= object
117 ms ± 3.15 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)

dtype= int
3.57 ms ± 63.1 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)

dtype= float
6.61 ms ± 21.3 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)

dtype= complex
10.7 ms ± 61.9 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)



In [11]:
vals1.sum()

TypeError: unsupported operand type(s) for +: 'int' and 'NoneType'

In [12]:
vals1.mean()

TypeError: unsupported operand type(s) for +: 'int' and 'NoneType'

In [13]:
vals1+10

TypeError: unsupported operand type(s) for +: 'NoneType' and 'int'

# Difference b/w None and np.nan

In [16]:
vals2 = np.array([1,3,4,np.nan])
vals2.dtype

dtype('float64')

In [17]:
vals2 = np.array([1,3,4,None])
vals2.dtype

dtype('O')

In [18]:
1+np.nan

nan

In [19]:
0*np.nan

nan

In [24]:
vals2 = np.array([1,np.nan,3,4,np.nan])
vals2

array([ 1., nan,  3.,  4., nan])

In [25]:
vals2.sum(),vals2.min(),vals2.max(),vals2.mean()

(nan, nan, nan, nan)

In [26]:
np.nansum(vals2),np.nanmin(vals2),np.nanmax(vals2),np.nanmean(vals2)

(8.0, 1.0, 4.0, 2.6666666666666665)

In [27]:
# In numpy when there is 

In [28]:
x = pd.Series(range(4), dtype=int)
x

0    0
1    1
2    2
3    3
dtype: int32

# Here we can perform indexing in Series but not in array

In [29]:
x[3]=None
x

0    0.0
1    1.0
2    2.0
3    NaN
dtype: float64

# Detecting Null values by using isnull & notnull

In [30]:
data = pd.Series([1, np.nan,10.5, None])
data

0     1.0
1     NaN
2    10.5
3     NaN
dtype: float64

In [31]:
data.isnull()

0    False
1     True
2    False
3     True
dtype: bool

In [33]:
Df = pd.DataFrame(data,columns=['Data'])
Df

Unnamed: 0,Data
0,1.0
1,
2,10.5
3,


In [34]:
Df.isnull()

Unnamed: 0,Data
0,False
1,True
2,False
3,True


In [35]:
Df['Age']=np.array([24,23,45,56])
Df

Unnamed: 0,Data,Age
0,1.0,24
1,,23
2,10.5,45
3,,56


In [36]:
Df.dtypes

Data    float64
Age       int32
dtype: object

In [37]:
data.notnull()

0     True
1    False
2     True
3    False
dtype: bool

In [38]:
Df.notnull()

Unnamed: 0,Data,Age
0,True,True
1,False,True
2,True,True
3,False,True


In [39]:
data[data.notnull()]

0     1.0
2    10.5
dtype: float64

In [40]:
Df[Df.notnull()]

Unnamed: 0,Data,Age
0,1.0,24
1,,23
2,10.5,45
3,,56


# The isnull() & notnull() methods produce similiar Boolean results for Dataframe

In [41]:
data

0     1.0
1     NaN
2    10.5
3     NaN
dtype: float64

In [42]:
data.dropna()

0     1.0
2    10.5
dtype: float64

In [43]:
data.fillna(value=10.5)

0     1.0
1    10.5
2    10.5
3    10.5
dtype: float64

In [44]:
mean = np.nanmean(data)
print(mean)
data.fillna(value=mean)

5.75


0     1.00
1     5.75
2    10.50
3     5.75
dtype: float64

In [45]:
data

0     1.0
1     NaN
2    10.5
3     NaN
dtype: float64

In [46]:
df = pd.DataFrame([[1,np.nan,2,None],
                  [2,3,5,10],
                  [np.nan,4,9,155]],columns=['A','B','C','D'])
df

Unnamed: 0,A,B,C,D
0,1.0,,2,
1,2.0,3.0,5,10.0
2,,4.0,9,155.0


In [47]:
df.dropna(axis='rows')

Unnamed: 0,A,B,C,D
1,2.0,3.0,5,10.0


In [48]:
df['D']=np.nan
df

Unnamed: 0,A,B,C,D
0,1.0,,2,
1,2.0,3.0,5,
2,,4.0,9,


In [49]:
df.dropna(axis='columns')

Unnamed: 0,C
0,2
1,5
2,9


In [50]:
df.dropna(axis='columns', how='all')

Unnamed: 0,A,B,C
0,1.0,,2
1,2.0,3.0,5
2,,4.0,9


In [51]:
df.dropna(axis=1,how='any')

Unnamed: 0,C
0,2
1,5
2,9


In [52]:
df.dropna(axis='rows', thresh=2)

Unnamed: 0,A,B,C,D
0,1.0,,2,
1,2.0,3.0,5,
2,,4.0,9,


In [53]:
df.dropna(axis='columns', thresh=3)

Unnamed: 0,C
0,2
1,5
2,9


In [54]:
df.dropna(axis='rows', thresh=3)

Unnamed: 0,A,B,C,D
1,2.0,3.0,5,


In [55]:
df.dropna(axis='columns', thresh=2)

Unnamed: 0,A,B,C
0,1.0,,2
1,2.0,3.0,5
2,,4.0,9


In [56]:
df.dropna(axis='columns', thresh=4)

0
1
2


In [59]:
Df = pd.DataFrame({"Int":[10,20,30,40,np.nan,50,60],
                  'Float':[20.5,10.5,5.5,2.5,6.5,np.nan,27.5],
                  'complex':[np.nan,2j+3,np.nan,23j+2,5j+2,np.nan,4j+3]
                  })
Df

Unnamed: 0,Int,Float,complex
0,10.0,20.5,
1,20.0,10.5,3.000000+2.000000j
2,30.0,5.5,
3,40.0,2.5,2.000000+23.000000j
4,,6.5,2.000000+5.000000j
5,50.0,,
6,60.0,27.5,3.000000+4.000000j


In [60]:
Df.isnull()

Unnamed: 0,Int,Float,complex
0,False,False,True
1,False,False,False
2,False,False,True
3,False,False,False
4,True,False,False
5,False,True,True
6,False,False,False


In [61]:
Df.isna()

Unnamed: 0,Int,Float,complex
0,False,False,True
1,False,False,False
2,False,False,True
3,False,False,False
4,True,False,False
5,False,True,True
6,False,False,False


In [62]:
Df.notnull()

Unnamed: 0,Int,Float,complex
0,True,True,False
1,True,True,True
2,True,True,False
3,True,True,True
4,False,True,True
5,True,False,False
6,True,True,True


In [63]:
Df.notna()

Unnamed: 0,Int,Float,complex
0,True,True,False
1,True,True,True
2,True,True,False
3,True,True,True
4,False,True,True
5,True,False,False
6,True,True,True


In [64]:
Fill = np.nansum(Df).astype(float)
Fill

  """Entry point for launching an IPython kernel.


293.0

In [65]:
Fill_Missing_Value =Df.fillna(Fill,axis=1)
Fill_Missing_Value

Unnamed: 0,Int,Float,complex
0,10.0,20.5,293.000000+0.000000j
1,20.0,10.5,3.000000+2.000000j
2,30.0,5.5,293.000000+0.000000j
3,40.0,2.5,2.000000+23.000000j
4,293.0,6.5,2.000000+5.000000j
5,50.0,293.0,293.000000+0.000000j
6,60.0,27.5,3.000000+4.000000j


# Filling null values

In [66]:
data = pd.Series([1, np.nan, 2, None,3], index=list('abcde'))
data

a    1.0
b    NaN
c    2.0
d    NaN
e    3.0
dtype: float64

In [67]:
data.fillna(0)

a    1.0
b    0.0
c    2.0
d    0.0
e    3.0
dtype: float64

In [68]:
## forward-fill
data.fillna(method='ffill')

a    1.0
b    1.0
c    2.0
d    2.0
e    3.0
dtype: float64

In [69]:
## back-fill
data.fillna(method='bfill')

a    1.0
b    2.0
c    2.0
d    3.0
e    3.0
dtype: float64

## While doing forward fill(ffill) and backward fill(bfill), axis=0 is acolumn and axis=1 is row

In [70]:
Df

Unnamed: 0,Int,Float,complex
0,10.0,20.5,
1,20.0,10.5,3.000000+2.000000j
2,30.0,5.5,
3,40.0,2.5,2.000000+23.000000j
4,,6.5,2.000000+5.000000j
5,50.0,,
6,60.0,27.5,3.000000+4.000000j


In [71]:
df.fillna(method='ffill', axis=1)

Unnamed: 0,A,B,C,D
0,1.0,1.0,2.0,2.0
1,2.0,3.0,5.0,5.0
2,,4.0,9.0,9.0


In [72]:
df.fillna(method='bfill', axis=1)

Unnamed: 0,A,B,C,D
0,1.0,2.0,2.0,
1,2.0,3.0,5.0,
2,4.0,4.0,9.0,


In [86]:
Data = pd.read_csv('california_cities.csv')
Data

Unnamed: 0.1,Unnamed: 0,city,latd,longd,elevation_m,elevation_ft,population_total,area_total_sq_mi,area_land_sq_mi,area_water_sq_mi,area_total_km2,area_land_km2,area_water_km2,area_water_percent
0,0,Adelanto,34.576111,-117.432778,875.0,2871.0,31765,56.027,56.009,0.018,145.107,145.062,0.046,0.03
1,1,AgouraHills,34.153333,-118.761667,281.0,922.0,20330,7.822,7.793,0.029,20.260,20.184,0.076,0.37
2,2,Alameda,37.756111,-122.274444,,33.0,75467,22.960,10.611,12.349,59.465,27.482,31.983,53.79
3,3,Albany,37.886944,-122.297778,,43.0,18969,5.465,1.788,3.677,14.155,4.632,9.524,67.28
4,4,Alhambra,34.081944,-118.135000,150.0,492.0,83089,7.632,7.631,0.001,19.766,19.763,0.003,0.01
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
477,477,Yountville,38.403056,-122.362222,30.0,98.0,2933,1.531,1.531,0.000,3.966,3.966,0.000,0.00
478,478,Yreka,41.726667,-122.637500,787.0,2582.0,7765,10.053,9.980,0.073,26.036,25.847,0.188,0.72
479,479,YubaCity,39.134722,-121.626111,18.0,59.0,64925,14.656,14.578,0.078,37.959,37.758,0.201,0.53
480,480,Yucaipa,34.030278,-117.048611,798.0,2618.0,51367,27.893,27.888,0.005,72.244,72.231,0.013,0.02


In [87]:
Data.head()

Unnamed: 0.1,Unnamed: 0,city,latd,longd,elevation_m,elevation_ft,population_total,area_total_sq_mi,area_land_sq_mi,area_water_sq_mi,area_total_km2,area_land_km2,area_water_km2,area_water_percent
0,0,Adelanto,34.576111,-117.432778,875.0,2871.0,31765,56.027,56.009,0.018,145.107,145.062,0.046,0.03
1,1,AgouraHills,34.153333,-118.761667,281.0,922.0,20330,7.822,7.793,0.029,20.26,20.184,0.076,0.37
2,2,Alameda,37.756111,-122.274444,,33.0,75467,22.96,10.611,12.349,59.465,27.482,31.983,53.79
3,3,Albany,37.886944,-122.297778,,43.0,18969,5.465,1.788,3.677,14.155,4.632,9.524,67.28
4,4,Alhambra,34.081944,-118.135,150.0,492.0,83089,7.632,7.631,0.001,19.766,19.763,0.003,0.01


In [88]:
Data.tail()

Unnamed: 0.1,Unnamed: 0,city,latd,longd,elevation_m,elevation_ft,population_total,area_total_sq_mi,area_land_sq_mi,area_water_sq_mi,area_total_km2,area_land_km2,area_water_km2,area_water_percent
477,477,Yountville,38.403056,-122.362222,30.0,98.0,2933,1.531,1.531,0.0,3.966,3.966,0.0,0.0
478,478,Yreka,41.726667,-122.6375,787.0,2582.0,7765,10.053,9.98,0.073,26.036,25.847,0.188,0.72
479,479,YubaCity,39.134722,-121.626111,18.0,59.0,64925,14.656,14.578,0.078,37.959,37.758,0.201,0.53
480,480,Yucaipa,34.030278,-117.048611,798.0,2618.0,51367,27.893,27.888,0.005,72.244,72.231,0.013,0.02
481,481,YuccaValley,34.133333,-116.416667,1027.0,3369.0,20700,40.015,40.015,0.0,103.639,103.639,0.0,0.0


In [89]:
Data.describe()

Unnamed: 0.1,Unnamed: 0,latd,longd,elevation_m,elevation_ft,population_total,area_total_sq_mi,area_land_sq_mi,area_water_sq_mi,area_total_km2,area_land_km2,area_water_km2,area_water_percent
count,482.0,482.0,482.0,434.0,470.0,482.0,480.0,482.0,481.0,477.0,478.0,478.0,477.0
mean,240.5,36.095088,-119.837714,180.416705,567.168723,64894.93,18.442183,16.840981,1.58963,46.753283,43.547021,3.125178,5.301801
std,139.285678,2.298006,2.147676,309.531825,1001.265722,203204.1,36.36323,32.694647,9.490386,90.972269,84.933022,11.562269,14.656428
min,0.0,32.578333,-124.263333,-34.0,-180.0,1.0,0.314,0.314,0.0,0.813,0.813,0.0,0.0
25%,120.25,33.977292,-121.954931,22.0,69.0,10902.0,3.70825,3.5065,0.0,9.551,9.025,0.00025,0.0
50%,240.5,36.204722,-119.708611,67.0,200.0,29057.5,9.093,8.362,0.029,23.551,21.533,0.0785,0.34
75%,360.75,37.886667,-117.977292,173.5,515.0,66466.5,19.87825,19.09,0.244,51.772,49.322,0.62325,1.87
max,481.0,41.965,-114.596389,2402.0,7880.0,3884307.0,503.0,469.0,185.02,1302.0,1214.0,122.27,84.58


In [90]:
Data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 482 entries, 0 to 481
Data columns (total 14 columns):
Unnamed: 0            482 non-null int64
city                  482 non-null object
latd                  482 non-null float64
longd                 482 non-null float64
elevation_m           434 non-null float64
elevation_ft          470 non-null float64
population_total      482 non-null int64
area_total_sq_mi      480 non-null float64
area_land_sq_mi       482 non-null float64
area_water_sq_mi      481 non-null float64
area_total_km2        477 non-null float64
area_land_km2         478 non-null float64
area_water_km2        478 non-null float64
area_water_percent    477 non-null float64
dtypes: float64(11), int64(2), object(1)
memory usage: 52.8+ KB


In [91]:
Data = Data.drop(['Unnamed: 0'],axis=1)

In [92]:
Data.head()

Unnamed: 0,city,latd,longd,elevation_m,elevation_ft,population_total,area_total_sq_mi,area_land_sq_mi,area_water_sq_mi,area_total_km2,area_land_km2,area_water_km2,area_water_percent
0,Adelanto,34.576111,-117.432778,875.0,2871.0,31765,56.027,56.009,0.018,145.107,145.062,0.046,0.03
1,AgouraHills,34.153333,-118.761667,281.0,922.0,20330,7.822,7.793,0.029,20.26,20.184,0.076,0.37
2,Alameda,37.756111,-122.274444,,33.0,75467,22.96,10.611,12.349,59.465,27.482,31.983,53.79
3,Albany,37.886944,-122.297778,,43.0,18969,5.465,1.788,3.677,14.155,4.632,9.524,67.28
4,Alhambra,34.081944,-118.135,150.0,492.0,83089,7.632,7.631,0.001,19.766,19.763,0.003,0.01


In [93]:
Data.isnull()

Unnamed: 0,city,latd,longd,elevation_m,elevation_ft,population_total,area_total_sq_mi,area_land_sq_mi,area_water_sq_mi,area_total_km2,area_land_km2,area_water_km2,area_water_percent
0,False,False,False,False,False,False,False,False,False,False,False,False,False
1,False,False,False,False,False,False,False,False,False,False,False,False,False
2,False,False,False,True,False,False,False,False,False,False,False,False,False
3,False,False,False,True,False,False,False,False,False,False,False,False,False
4,False,False,False,False,False,False,False,False,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
477,False,False,False,False,False,False,False,False,False,False,False,False,False
478,False,False,False,False,False,False,False,False,False,False,False,False,False
479,False,False,False,False,False,False,False,False,False,False,False,False,False
480,False,False,False,False,False,False,False,False,False,False,False,False,False


In [94]:
Data.columns

Index(['city', 'latd', 'longd', 'elevation_m', 'elevation_ft',
       'population_total', 'area_total_sq_mi', 'area_land_sq_mi',
       'area_water_sq_mi', 'area_total_km2', 'area_land_km2', 'area_water_km2',
       'area_water_percent'],
      dtype='object')

In [95]:
fill = np.nanmean(Data['elevation_m'])
fill

180.41670506912445

In [96]:
Data

Unnamed: 0,city,latd,longd,elevation_m,elevation_ft,population_total,area_total_sq_mi,area_land_sq_mi,area_water_sq_mi,area_total_km2,area_land_km2,area_water_km2,area_water_percent
0,Adelanto,34.576111,-117.432778,875.0,2871.0,31765,56.027,56.009,0.018,145.107,145.062,0.046,0.03
1,AgouraHills,34.153333,-118.761667,281.0,922.0,20330,7.822,7.793,0.029,20.260,20.184,0.076,0.37
2,Alameda,37.756111,-122.274444,,33.0,75467,22.960,10.611,12.349,59.465,27.482,31.983,53.79
3,Albany,37.886944,-122.297778,,43.0,18969,5.465,1.788,3.677,14.155,4.632,9.524,67.28
4,Alhambra,34.081944,-118.135000,150.0,492.0,83089,7.632,7.631,0.001,19.766,19.763,0.003,0.01
...,...,...,...,...,...,...,...,...,...,...,...,...,...
477,Yountville,38.403056,-122.362222,30.0,98.0,2933,1.531,1.531,0.000,3.966,3.966,0.000,0.00
478,Yreka,41.726667,-122.637500,787.0,2582.0,7765,10.053,9.980,0.073,26.036,25.847,0.188,0.72
479,YubaCity,39.134722,-121.626111,18.0,59.0,64925,14.656,14.578,0.078,37.959,37.758,0.201,0.53
480,Yucaipa,34.030278,-117.048611,798.0,2618.0,51367,27.893,27.888,0.005,72.244,72.231,0.013,0.02


In [97]:
fill_elevation_m=np.nanmean(Data['elevation_m'])
fill_elevation_m

180.41670506912445

In [98]:
Data['elevation_m']=Data.fillna(fill_elevation_m)

In [99]:
Data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 482 entries, 0 to 481
Data columns (total 13 columns):
city                  482 non-null object
latd                  482 non-null float64
longd                 482 non-null float64
elevation_m           482 non-null object
elevation_ft          470 non-null float64
population_total      482 non-null int64
area_total_sq_mi      480 non-null float64
area_land_sq_mi       482 non-null float64
area_water_sq_mi      481 non-null float64
area_total_km2        477 non-null float64
area_land_km2         478 non-null float64
area_water_km2        478 non-null float64
area_water_percent    477 non-null float64
dtypes: float64(10), int64(1), object(2)
memory usage: 49.1+ KB


In [100]:
fill_elevation_ft=np.nanmean(Data['elevation_ft'])
fill_elevation_ft

567.1687234042553

In [102]:
Data['elevation_ft']=Data.fillna(fill_elevation_ft)

In [103]:
Data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 482 entries, 0 to 481
Data columns (total 13 columns):
city                  482 non-null object
latd                  482 non-null float64
longd                 482 non-null float64
elevation_m           482 non-null object
elevation_ft          482 non-null object
population_total      482 non-null int64
area_total_sq_mi      480 non-null float64
area_land_sq_mi       482 non-null float64
area_water_sq_mi      481 non-null float64
area_total_km2        477 non-null float64
area_land_km2         478 non-null float64
area_water_km2        478 non-null float64
area_water_percent    477 non-null float64
dtypes: float64(9), int64(1), object(3)
memory usage: 49.1+ KB


In [104]:
fill_area_total_km2=np.nanmean(Data['area_total_km2'])
fill_area_total_km2

46.753283018867926

In [106]:
Data['area_total_km2']=Data.fillna('area_total_km2')

In [107]:
Data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 482 entries, 0 to 481
Data columns (total 13 columns):
city                  482 non-null object
latd                  482 non-null float64
longd                 482 non-null float64
elevation_m           482 non-null object
elevation_ft          482 non-null object
population_total      482 non-null int64
area_total_sq_mi      480 non-null float64
area_land_sq_mi       482 non-null float64
area_water_sq_mi      481 non-null float64
area_total_km2        482 non-null object
area_land_km2         478 non-null float64
area_water_km2        478 non-null float64
area_water_percent    477 non-null float64
dtypes: float64(8), int64(1), object(4)
memory usage: 49.1+ KB


In [108]:
fill_area_land_km2=np.nanmean(Data['area_land_km2'])
fill_area_land_km2

43.54702092050209

In [109]:
Data['area_land_km2']=Data.fillna('area_land_km2')

In [110]:
Data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 482 entries, 0 to 481
Data columns (total 13 columns):
city                  482 non-null object
latd                  482 non-null float64
longd                 482 non-null float64
elevation_m           482 non-null object
elevation_ft          482 non-null object
population_total      482 non-null int64
area_total_sq_mi      480 non-null float64
area_land_sq_mi       482 non-null float64
area_water_sq_mi      481 non-null float64
area_total_km2        482 non-null object
area_land_km2         482 non-null object
area_water_km2        478 non-null float64
area_water_percent    477 non-null float64
dtypes: float64(7), int64(1), object(5)
memory usage: 49.1+ KB


In [111]:
fill_area_water_km2=np.nanmean(Data['area_water_km2'])
fill_area_water_km2

3.1251782426778236

In [112]:
Data['area_water_km2']=Data.fillna('area_water_km2')
Data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 482 entries, 0 to 481
Data columns (total 13 columns):
city                  482 non-null object
latd                  482 non-null float64
longd                 482 non-null float64
elevation_m           482 non-null object
elevation_ft          482 non-null object
population_total      482 non-null int64
area_total_sq_mi      480 non-null float64
area_land_sq_mi       482 non-null float64
area_water_sq_mi      481 non-null float64
area_total_km2        482 non-null object
area_land_km2         482 non-null object
area_water_km2        482 non-null object
area_water_percent    477 non-null float64
dtypes: float64(6), int64(1), object(6)
memory usage: 49.1+ KB


In [113]:
fill_area_water_percent=np.nanmean(Data['area_water_percent'])
fill_area_water_percent

5.301800838574423

In [114]:
Data['area_water_percent']=Data.fillna('area_water_percent')
Data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 482 entries, 0 to 481
Data columns (total 13 columns):
city                  482 non-null object
latd                  482 non-null float64
longd                 482 non-null float64
elevation_m           482 non-null object
elevation_ft          482 non-null object
population_total      482 non-null int64
area_total_sq_mi      480 non-null float64
area_land_sq_mi       482 non-null float64
area_water_sq_mi      481 non-null float64
area_total_km2        482 non-null object
area_land_km2         482 non-null object
area_water_km2        482 non-null object
area_water_percent    482 non-null object
dtypes: float64(5), int64(1), object(7)
memory usage: 49.1+ KB


In [116]:
fill_area_total_sq_mi=np.nanmean(Data['area_total_sq_mi'])
fill_area_total_sq_mi

18.442183333333332

In [117]:
Data['area_total_sq_mi']=Data.fillna('area_total_sq_mi')
Data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 482 entries, 0 to 481
Data columns (total 13 columns):
city                  482 non-null object
latd                  482 non-null float64
longd                 482 non-null float64
elevation_m           482 non-null object
elevation_ft          482 non-null object
population_total      482 non-null int64
area_total_sq_mi      482 non-null object
area_land_sq_mi       482 non-null float64
area_water_sq_mi      481 non-null float64
area_total_km2        482 non-null object
area_land_km2         482 non-null object
area_water_km2        482 non-null object
area_water_percent    482 non-null object
dtypes: float64(4), int64(1), object(8)
memory usage: 49.1+ KB


In [118]:
fill_area_water_sq_mi=np.nanmean(Data['area_water_sq_mi'])
fill_area_water_sq_mi

1.5896299376299377

In [119]:
Data['area_water_sq_mi']=Data.fillna('area_water_sq_mi')
Data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 482 entries, 0 to 481
Data columns (total 13 columns):
city                  482 non-null object
latd                  482 non-null float64
longd                 482 non-null float64
elevation_m           482 non-null object
elevation_ft          482 non-null object
population_total      482 non-null int64
area_total_sq_mi      482 non-null object
area_land_sq_mi       482 non-null float64
area_water_sq_mi      482 non-null object
area_total_km2        482 non-null object
area_land_km2         482 non-null object
area_water_km2        482 non-null object
area_water_percent    482 non-null object
dtypes: float64(3), int64(1), object(9)
memory usage: 49.1+ KB
