### Import Libraries

In [2]:
import pandas as pd
import numpy as np

### Read CSV

In [3]:
train = pd.read_csv('./assets/train.csv')
weather = pd.read_csv('./assets/weather.csv')
spray = pd.read_csv('./assets/spray.csv')

##### Check Data Types and Columns name

**from weather descr document**
- `M`: Missing Data, or data that are not normally reported for the station
- `-`: values not available for sunrise/sunset.
    - because each date has two reported data from station 1 and 2. Therefore, station 2 column of the date has no sunrise/sunset data.

In [4]:
print("Train set data types:\n")
print(train.dtypes)
print("-----")
print("Weather set data types:\n")
print(weather.dtypes)
print("-----")
print("Spray set data types:\n")
print(spray.dtypes)


Train set data types:

Date                       object
Address                    object
Species                    object
Block                       int64
Street                     object
Trap                       object
AddressNumberAndStreet     object
Latitude                  float64
Longitude                 float64
AddressAccuracy             int64
NumMosquitos                int64
WnvPresent                  int64
dtype: object
-----
Weather set data types:

Station          int64
Date            object
Tmax             int64
Tmin             int64
Tavg            object
Depart          object
DewPoint         int64
WetBulb         object
Heat            object
Cool            object
Sunrise         object
Sunset          object
CodeSum         object
Depth           object
Water1          object
SnowFall        object
PrecipTotal     object
StnPressure     object
SeaLevel        object
ResultSpeed    float64
ResultDir        int64
AvgSpeed        object
dtype: object
----

In [5]:
train.WnvPresent.value_counts()

0    9955
1     551
Name: WnvPresent, dtype: int64

In [6]:
# weird. How 7:44:32 pm has 541 values? are they using auto-spray function?

spray.Time.value_counts()

7:44:32 PM     541
8:59:06 PM       5
9:35:47 PM       5
8:58:26 PM       5
9:05:56 PM       5
8:57:46 PM       5
9:38:27 PM       5
8:57:56 PM       5
8:55:36 PM       5
9:40:27 PM       5
8:55:46 PM       5
9:31:27 PM       5
9:37:27 PM       5
8:55:26 PM       5
8:58:56 PM       5
9:30:17 PM       4
8:42:55 PM       4
8:29:01 PM       4
9:27:15 PM       4
8:59:05 PM       4
8:37:36 PM       4
9:28:47 PM       4
9:59:05 PM       4
8:22:11 PM       4
9:01:36 PM       4
8:59:15 PM       4
9:00:35 PM       4
9:10:56 PM       4
9:42:45 PM       4
9:07:26 PM       4
              ... 
8:49:12 PM       1
8:57:33 PM       1
12:16:49 AM      1
7:41:56 PM       1
10:33:45 PM      1
7:39:51 PM       1
10:09:36 PM      1
7:36:16 PM       1
10:25:26 PM      1
9:10:42 PM       1
8:18:21 PM       1
9:44:38 PM       1
8:40:55 PM       1
10:03:11 PM      1
10:47:15 PM      1
8:07:33 PM       1
10:10:37 PM      1
9:27:07 PM       1
8:59:19 PM       1
11:01:40 PM      1
7:46:05 PM       1
9:02:02 PM  

In [7]:
# it seems the duplicated data.
spray[spray.Time == "7:44:32 PM"]

Unnamed: 0,Date,Time,Latitude,Longitude
489,2011-09-07,7:44:32 PM,41.98646,-87.794225
490,2011-09-07,7:44:32 PM,41.98646,-87.794225
491,2011-09-07,7:44:32 PM,41.98646,-87.794225
492,2011-09-07,7:44:32 PM,41.98646,-87.794225
493,2011-09-07,7:44:32 PM,41.98646,-87.794225
494,2011-09-07,7:44:32 PM,41.98646,-87.794225
495,2011-09-07,7:44:32 PM,41.98646,-87.794225
496,2011-09-07,7:44:32 PM,41.98646,-87.794225
497,2011-09-07,7:44:32 PM,41.98646,-87.794225
498,2011-09-07,7:44:32 PM,41.98646,-87.794225


In [8]:
spray[spray.Longitude == -87.794225]['Time'].value_counts()

7:44:32 PM    541
Name: Time, dtype: int64

##### Check numerical columns data, find abnormalty or outliers

In [9]:
print(train.isnull().sum().sum())
print(weather.isnull().sum().sum())
print(spray.isnull().sum().sum())


0
0
584


In [10]:
spray[spray['Time'].isnull()]

Unnamed: 0,Date,Time,Latitude,Longitude
1030,2011-09-07,,41.987092,-87.794286
1031,2011-09-07,,41.987620,-87.794382
1032,2011-09-07,,41.988004,-87.794574
1033,2011-09-07,,41.988292,-87.795486
1034,2011-09-07,,41.988100,-87.796014
1035,2011-09-07,,41.986372,-87.794862
1036,2011-09-07,,41.986228,-87.795582
1037,2011-09-07,,41.984836,-87.793998
1038,2011-09-07,,41.984836,-87.794670
1039,2011-09-07,,41.984884,-87.795198


In [11]:
train.describe()

Unnamed: 0,Block,Latitude,Longitude,AddressAccuracy,NumMosquitos,WnvPresent
count,10506.0,10506.0,10506.0,10506.0,10506.0,10506.0
mean,35.687797,41.841139,-87.699908,7.819532,12.853512,0.052446
std,24.339468,0.112742,0.096514,1.452921,16.133816,0.222936
min,10.0,41.644612,-87.930995,3.0,1.0,0.0
25%,12.0,41.732984,-87.76007,8.0,2.0,0.0
50%,33.0,41.846283,-87.694991,8.0,5.0,0.0
75%,52.0,41.95469,-87.627796,9.0,17.0,0.0
max,98.0,42.01743,-87.531635,9.0,50.0,1.0


In [12]:
spray.describe()

Unnamed: 0,Latitude,Longitude
count,14835.0,14835.0
mean,41.904828,-87.73669
std,0.104381,0.067292
min,41.713925,-88.096468
25%,41.785001,-87.794225
50%,41.940075,-87.727853
75%,41.980978,-87.694108
max,42.395983,-87.586727


In [13]:
weather.describe()

Unnamed: 0,Station,Tmax,Tmin,DewPoint,ResultSpeed,ResultDir
count,2944.0,2944.0,2944.0,2944.0,2944.0,2944.0
mean,1.5,76.166101,57.810462,53.45788,6.960666,17.494905
std,0.500085,11.46197,10.381939,10.675181,3.587527,10.063609
min,1.0,41.0,29.0,22.0,0.1,1.0
25%,1.0,69.0,50.0,46.0,4.3,7.0
50%,1.5,78.0,59.0,54.0,6.4,19.0
75%,2.0,85.0,66.0,62.0,9.2,25.0
max,2.0,104.0,83.0,75.0,24.1,36.0


In [14]:
# courtesy of Brian Collins

def date_separate(df):
    df = df.copy()
    df['Year'] = pd.DatetimeIndex(df['Date']).year
    df['Month'] = pd.DatetimeIndex(df['Date']).month
    df['Day'] = pd.DatetimeIndex(df['Date']).day
    return df

# make all the dataset applied to the function
train=date_separate(train)
weather=date_separate(weather)
spray=date_separate(spray)

##### Found some suspicious data
- 11 missing values in `weather['Tavg']` (should've `int`)
-

In [15]:
#weather[weather['Depart'] == 'M'] # 1472 sets
weather['Depart'].value_counts()

M      1472
 2       93
-1       84
-2       80
 5       77
 7       76
 1       76
 3       75
 0       74
-3       72
 4       71
 6       67
 8       59
-5       57
-4       56
-6       50
 9       47
10       46
-8       43
-7       30
11       28
12       28
-9       25
13       23
14       22
-10      22
15       15
16       12
-11      10
-12       8
17        7
18        6
-14       6
-13       5
20        4
19        4
-15       3
-16       3
22        3
21        2
-17       2
23        1
Name: Depart, dtype: int64

In [21]:
weather[weather['Tavg'] == 'M']

Unnamed: 0,Station,Date,Tmax,Tmin,Tavg,Depart,DewPoint,WetBulb,Heat,Cool,...,SnowFall,PrecipTotal,StnPressure,SeaLevel,ResultSpeed,ResultDir,AvgSpeed,Year,Month,Day
7,2,2007-05-04,78,51,M,M,42,50,M,M,...,M,0.00,29.36,30.04,10.1,7,10.4,2007,5,4
505,2,2008-07-08,86,46,M,M,68,71,M,M,...,M,0.28,29.16,29.80,7.4,24,8.3,2008,7,8
675,2,2008-10-01,62,46,M,M,41,47,M,M,...,M,0.00,29.3,29.96,10.9,33,11.0,2008,10,1
1637,2,2011-07-22,100,71,M,M,70,74,M,M,...,M,0.14,29.23,29.86,3.8,10,8.2,2011,7,22
2067,2,2012-08-22,84,72,M,M,51,61,M,M,...,M,0.00,29.39,M,4.7,19,M,2012,8,22
2211,2,2013-05-02,71,42,M,M,39,45,M,M,...,M,0.00,29.51,30.17,15.8,2,16.1,2013,5,2
2501,2,2013-09-24,91,52,M,M,48,54,M,M,...,M,0.00,29.33,30.00,5.8,9,7.7,2013,9,24
2511,2,2013-09-29,84,53,M,M,48,54,M,M,...,M,0.22,29.36,30.01,6.3,36,7.8,2013,9,29
2525,2,2013-10-06,76,48,M,M,44,50,M,M,...,M,0.06,29.1,29.76,10.1,25,10.6,2013,10,6
2579,2,2014-05-02,80,47,M,M,43,47,M,M,...,M,0.04,29.1,29.79,10.7,23,11.9,2014,5,2


In [17]:
train['Species'].value_counts()

CULEX PIPIENS/RESTUANS    4752
CULEX RESTUANS            2740
CULEX PIPIENS             2699
CULEX TERRITANS            222
CULEX SALINARIUS            86
CULEX TARSALIS               6
CULEX ERRATICUS              1
Name: Species, dtype: int64

In [36]:
train['Trap'].value_counts()

T900     750
T115     542
T138     314
T002     185
T135     183
T054     163
T128     160
T151     156
T212     152
T090     151
T114     148
T158     147
T031     146
T048     146
T903     140
T008     139
T011     133
T209     130
T200     129
T073     128
T030     126
T003     122
T094     121
T028     117
T066     116
T074     115
T047     114
T102     112
T221     112
T159     111
        ... 
T141      22
T206      21
T154      21
T162      21
T219      21
T156      21
T075      20
T070      20
T071      19
T142      18
T097      17
T034      17
T050      17
T060      15
T001      15
T051      15
T044      15
T157      14
T005      14
T149      14
T072      13
T004      12
T054C      9
T078       8
T229       8
T238       7
T076       5
T094B      5
T237       3
T040       2
Name: Trap, Length: 136, dtype: int64

In [38]:
train.groupby('Species')['Trap'].value_counts()

Species          Trap
CULEX ERRATICUS  T900      1
CULEX PIPIENS    T115    337
                 T138    179
                 T900    131
                 T128     65
                 T135     49
                 T002     41
                 T200     36
                 T073     35
                 T212     34
                 T209     33
                 T031     32
                 T090     32
                 T103     31
                 T114     31
                 T903     31
                 T048     30
                 T151     30
                 T158     30
                 T086     29
                 T008     28
                 T011     28
                 T102     28
                 T221     27
                 T003     26
                 T030     26
                 T063     26
                 T061     25
                 T065     25
                 T094     25
                        ... 
CULEX TERRITANS  T033      1
                 T034      1
                 T037

In [40]:
weather['WetBulb'].value_counts()

63    135
65    131
59    129
61    123
64    121
62    118
67    117
66    113
60    111
69    107
57    105
70    102
55     99
71     99
68     98
58     95
56     95
52     78
46     72
72     71
53     69
47     61
51     57
54     56
73     56
50     55
45     53
48     51
49     50
74     50
44     45
42     35
40     31
43     31
75     20
41     19
38     17
39     15
36      9
76      8
35      8
77      8
37      5
33      5
34      5
M       4
32      1
78      1
Name: WetBulb, dtype: int64

In [42]:
weather['Heat'].value_counts()

0     1870
4       88
1       86
2       81
8       67
3       66
5       61
15      57
12      49
7       49
10      48
11      48
13      46
9       46
6       45
14      36
16      29
20      28
18      24
19      24
21      19
17      17
23      15
22      12
M       11
24       7
25       5
26       4
27       2
29       2
28       2
Name: Heat, dtype: int64

In [43]:
weather['Cool'].value_counts()

 0    1147
 8     138
12     117
 5     117
10     110
 6     109
 9     107
 7     104
 4     103
13     102
11     100
 3      99
14      98
 1      93
 2      89
15      84
16      55
17      48
18      34
19      21
21      16
20      16
M       11
22       9
24       4
26       4
23       4
25       2
28       1
27       1
29       1
Name: Cool, dtype: int64

In [44]:
# can we break it by Regex?

weather['CodeSum'].value_counts()

                        1609
RA                       296
RA BR                    238
BR                       110
TSRA RA BR                92
BR HZ                     81
RA DZ BR                  65
TSRA RA                   43
HZ                        39
RA BR HZ                  38
TSRA                      34
RA DZ                     22
TSRA BR                   21
TS TSRA RA BR             19
RA HZ                     16
TS RA                     13
TSRA RA BR HZ             12
DZ BR                     10
TS TSRA BR                10
TS                        10
DZ BR HZ                   9
DZ                         8
TS RA BR                   8
TSRA BR HZ                 8
TS TSRA                    7
TS TSRA BR HZ              7
TS TSRA RA                 7
RA DZ BR HZ                7
TSRA HZ                    4
TSRA RA BR VCTS            4
                        ... 
RA BCFG BR                 1
TSRA BR HZ FU              1
FG+ BCFG BR                1
BR VCTS       

In [45]:
weather['Depth'].value_counts()

0    1472
M    1472
Name: Depth, dtype: int64

In [46]:
# Since all the data is missing. we can omit!
weather['Water1'].value_counts()

M    2944
Name: Water1, dtype: int64

In [47]:
# It also seems irrelavant. no meaningful values
weather['SnowFall'].value_counts()

M      1472
0.0    1459
  T      12
0.1       1
Name: SnowFall, dtype: int64

In [64]:
# 318 values are just trace rainfall

weather.PrecipTotal=weather.PrecipTotal.str.strip()
weather[weather.PrecipTotal == 'T']

Unnamed: 0,Station,Date,Tmax,Tmin,Tavg,Depart,DewPoint,WetBulb,Heat,Cool,...,SnowFall,PrecipTotal,StnPressure,SeaLevel,ResultSpeed,ResultDir,AvgSpeed,Year,Month,Day
6,1,2007-05-04,66,49,58,4,41,50,7,0,...,0.0,T,29.31,30.05,10.4,8,10.8,2007,5,4
8,1,2007-05-05,66,53,60,5,38,49,5,0,...,0.0,T,29.40,30.10,11.7,7,12.0,2007,5,5
9,2,2007-05-05,66,54,60,M,39,50,5,0,...,M,T,29.46,30.09,11.2,7,11.5,2007,5,5
12,1,2007-05-07,83,47,65,10,41,54,0,0,...,0.0,T,29.38,30.12,8.6,18,10.5,2007,5,7
15,2,2007-05-08,80,60,70,M,57,63,0,5,...,M,T,29.36,30.02,2.5,8,5.4,2007,5,8
47,2,2007-05-24,88,66,77,M,53,63,0,12,...,M,T,29.47,30.10,13.8,20,15.2,2007,5,24
48,1,2007-05-25,70,56,63,1,44,53,2,0,...,0.0,T,29.51,30.24,4.9,5,8.8,2007,5,25
54,1,2007-05-28,79,50,65,2,47,56,0,0,...,0.0,T,29.43,30.15,4.8,10,5.3,2007,5,28
65,2,2007-06-02,81,68,75,M,62,67,0,10,...,M,T,29.10,29.76,7.5,21,8.2,2007,6,2
73,2,2007-06-06,73,48,61,M,47,55,4,0,...,M,T,29.13,29.82,9.6,15,10.6,2007,6,6


In [66]:
# 0.00 rainfall - 1,577 values
weather[weather.PrecipTotal == '0.00']

Unnamed: 0,Station,Date,Tmax,Tmin,Tavg,Depart,DewPoint,WetBulb,Heat,Cool,...,SnowFall,PrecipTotal,StnPressure,SeaLevel,ResultSpeed,ResultDir,AvgSpeed,Year,Month,Day
0,1,2007-05-01,83,50,67,14,51,56,0,2,...,0.0,0.00,29.10,29.82,1.7,27,9.2,2007,5,1
1,2,2007-05-01,84,52,68,M,51,57,0,3,...,M,0.00,29.18,29.82,2.7,25,9.6,2007,5,1
2,1,2007-05-02,59,42,51,-3,42,47,14,0,...,0.0,0.00,29.38,30.09,13.0,4,13.4,2007,5,2
3,2,2007-05-02,60,43,52,M,42,47,13,0,...,M,0.00,29.44,30.08,13.3,2,13.4,2007,5,2
4,1,2007-05-03,66,46,56,2,40,48,9,0,...,0.0,0.00,29.39,30.12,11.7,7,11.9,2007,5,3
5,2,2007-05-03,67,48,58,M,40,50,7,0,...,M,0.00,29.46,30.12,12.9,6,13.2,2007,5,3
7,2,2007-05-04,78,51,M,M,42,50,M,M,...,M,0.00,29.36,30.04,10.1,7,10.4,2007,5,4
10,1,2007-05-06,68,49,59,4,30,46,6,0,...,0.0,0.00,29.57,30.29,14.4,11,15.0,2007,5,6
11,2,2007-05-06,68,52,60,M,30,46,5,0,...,M,0.00,29.62,30.28,13.8,10,14.5,2007,5,6
13,2,2007-05-07,84,50,67,M,39,53,0,2,...,M,0.00,29.44,30.12,8.5,17,9.9,2007,5,7


In [10]:
# only select `object` columns
train_cats = train.select_dtypes(include=['object']).columns
weather_cats = weather.select_dtypes(include=['object']).columns
spray_cats = spray.select_dtypes(include=['object']).columns

print(train_cats,'\n')
print(weather_cats,'\n')
print(spray_cats,'\n')

Index(['Date', 'Address', 'Species', 'Street', 'Trap',
       'AddressNumberAndStreet'],
      dtype='object') 

Index(['Date', 'Tavg', 'Depart', 'WetBulb', 'Heat', 'Cool', 'Sunrise',
       'Sunset', 'CodeSum', 'Depth', 'Water1', 'SnowFall', 'PrecipTotal',
       'StnPressure', 'SeaLevel', 'AvgSpeed'],
      dtype='object') 

Index(['Date', 'Time'], dtype='object') 



In [25]:
train['Trap'].value_counts()

T900     750
T115     542
T138     314
T002     185
T135     183
T054     163
T128     160
T151     156
T212     152
T090     151
T114     148
T158     147
T031     146
T048     146
T903     140
T008     139
T011     133
T209     130
T200     129
T073     128
T030     126
T003     122
T094     121
T028     117
T066     116
T074     115
T047     114
T102     112
T221     112
T159     111
        ... 
T141      22
T206      21
T154      21
T162      21
T219      21
T156      21
T075      20
T070      20
T071      19
T142      18
T097      17
T034      17
T050      17
T060      15
T001      15
T051      15
T044      15
T157      14
T005      14
T149      14
T072      13
T004      12
T054C      9
T078       8
T229       8
T238       7
T076       5
T094B      5
T237       3
T040       2
Name: Trap, Length: 136, dtype: int64

In [29]:
train.head(1)

Unnamed: 0,Date,Address,Species,Block,Street,Trap,AddressNumberAndStreet,Latitude,Longitude,AddressAccuracy,NumMosquitos,WnvPresent
0,2007-05-29,"4100 North Oak Park Avenue, Chicago, IL 60634,...",CULEX PIPIENS/RESTUANS,41,N OAK PARK AVE,T002,"4100 N OAK PARK AVE, Chicago, IL",41.95469,-87.800991,9,1,0


In [None]:
weather.describe

In [55]:
weather.head(5).T

Unnamed: 0,0,1,2,3,4
Station,1,2,1,2,1
Date,2007-05-01,2007-05-01,2007-05-02,2007-05-02,2007-05-03
Tmax,83,84,59,60,66
Tmin,50,52,42,43,46
Tavg,67,68,51,52,56
Depart,14,M,-3,M,2
DewPoint,51,51,42,42,40
WetBulb,56,57,47,47,48
Heat,0,0,14,13,9
Cool,2,3,0,0,0


In [56]:
spray.head().T

Unnamed: 0,0,1,2,3,4
Date,2011-08-29,2011-08-29,2011-08-29,2011-08-29,2011-08-29
Time,6:56:58 PM,6:57:08 PM,6:57:18 PM,6:57:28 PM,6:57:38 PM
Latitude,42.3916,42.3913,42.391,42.3906,42.3904
Longitude,-88.0892,-88.0892,-88.0892,-88.0892,-88.0889


In [57]:
train.head().T

Unnamed: 0,0,1,2,3,4
Date,2007-05-29,2007-05-29,2007-05-29,2007-05-29,2007-05-29
Address,"4100 North Oak Park Avenue, Chicago, IL 60634,...","4100 North Oak Park Avenue, Chicago, IL 60634,...","6200 North Mandell Avenue, Chicago, IL 60646, USA","7900 West Foster Avenue, Chicago, IL 60656, USA","7900 West Foster Avenue, Chicago, IL 60656, USA"
Species,CULEX PIPIENS/RESTUANS,CULEX RESTUANS,CULEX RESTUANS,CULEX PIPIENS/RESTUANS,CULEX RESTUANS
Block,41,41,62,79,79
Street,N OAK PARK AVE,N OAK PARK AVE,N MANDELL AVE,W FOSTER AVE,W FOSTER AVE
Trap,T002,T002,T007,T015,T015
AddressNumberAndStreet,"4100 N OAK PARK AVE, Chicago, IL","4100 N OAK PARK AVE, Chicago, IL","6200 N MANDELL AVE, Chicago, IL","7900 W FOSTER AVE, Chicago, IL","7900 W FOSTER AVE, Chicago, IL"
Latitude,41.9547,41.9547,41.995,41.9741,41.9741
Longitude,-87.801,-87.801,-87.7693,-87.8248,-87.8248
AddressAccuracy,9,9,9,8,8


In [31]:
spray.head(1)

Unnamed: 0,Date,Time,Latitude,Longitude
0,2011-08-29,6:56:58 PM,42.391623,-88.089163


In [53]:
weather.groupby('Station')['Tmax'].mean()

Station
1    75.978940
2    76.353261
Name: Tmax, dtype: float64

In [47]:
weather.head().T

Unnamed: 0,0,1,2,3,4
Station,1,2,1,2,1
Date,2007-05-01,2007-05-01,2007-05-02,2007-05-02,2007-05-03
Tmax,83,84,59,60,66
Tmin,50,52,42,43,46
Tavg,67,68,51,52,56
Depart,14,M,-3,M,2
DewPoint,51,51,42,42,40
WetBulb,56,57,47,47,48
Heat,0,0,14,13,9
Cool,2,3,0,0,0


In [46]:
weather[['Sunrise']].info

<bound method DataFrame.info of      Sunrise
0       0448
1          -
2       0447
3          -
4       0446
5          -
6       0444
7          -
8       0443
9          -
10      0442
11         -
12      0441
13         -
14      0439
15         -
16      0438
17         -
18      0437
19         -
20      0436
21         -
22      0435
23         -
24      0434
25         -
26      0433
27         -
28      0432
29         -
...      ...
2914    0606
2915       -
2916    0607
2917       -
2918    0608
2919       -
2920    0610
2921       -
2922    0611
2923       -
2924    0612
2925       -
2926    0613
2927       -
2928    0614
2929       -
2930    0616
2931       -
2932    0617
2933       -
2934    0618
2935       -
2936    0619
2937       -
2938    0620
2939       -
2940    0622
2941       -
2942    0623
2943       -

[2944 rows x 1 columns]>

In [37]:
weather[['Sunset']]

Unnamed: 0,Sunset
0,1849
1,-
2,1850
3,-
4,1851
5,-
6,1852
7,-
8,1853
9,-


In [38]:
train.isnull().sum()

Date                      0
Address                   0
Species                   0
Block                     0
Street                    0
Trap                      0
AddressNumberAndStreet    0
Latitude                  0
Longitude                 0
AddressAccuracy           0
NumMosquitos              0
WnvPresent                0
dtype: int64

In [39]:
weather.isnull().sum()

Station        0
Date           0
Tmax           0
Tmin           0
Tavg           0
Depart         0
DewPoint       0
WetBulb        0
Heat           0
Cool           0
Sunrise        0
Sunset         0
CodeSum        0
Depth          0
Water1         0
SnowFall       0
PrecipTotal    0
StnPressure    0
SeaLevel       0
ResultSpeed    0
ResultDir      0
AvgSpeed       0
dtype: int64

In [40]:
spray.isnull().sum()

Date           0
Time         584
Latitude       0
Longitude      0
dtype: int64