In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler,OneHotEncoder
import tensorflow as tf


In [2]:
## Import up sound alert dependencies
from IPython.display import Audio, display

# def allDone():
#   display(Audio(url='https://sound.peal.io/ps/audios/000/000/537/original/woo_vu_luvub_dub_dub.wav', autoplay=True))
# ## Insert whatever audio file you want above

In [3]:
pd.options.display.max_columns = 47

In [4]:
data = pd.read_csv('US_Accidents_Dec20_Updated.csv')

In [5]:
def accidents_ETL(accidents):
    
    #drop unnecessary columns
    dropthese = ['ID', 'End_Lat', 'End_Lng', 'End_Time',    'Description', 'Distance(mi)', 'Number', 'Street', 'Country', 'Timezone', 'Airport_Code', 'Weather_Timestamp', 'Start_Lat', 'Start_Lng', 'Nautical_Twilight', 'Astronomical_Twilight', 'Wind_Chill(F)', 'Amenity', 'Sunrise_Sunset']
    accidents = accidents.drop(columns = dropthese, axis = 1)

    #-------------------------------------------------------#
    #-------------------------------------------------------#
 
    #Remname Civil Twilight
    accidents = accidents.rename(columns={'Civil_Twilight': 'Day/Night'})

    #-------------------------------------------------------#
    #-------------------------------------------------------#

    #modify Start_Time to datetime so we can extract year, month, day, hour
    accidents['Start_Time'] = pd.to_datetime(accidents['Start_Time'])

    #-------------------------------------------------------#
    #-------------------------------------------------------#
 
    #extracting year, month, day, hour
    accidents['Year'] = pd.DatetimeIndex(accidents['Start_Time']).year
    accidents['Month'] = pd.DatetimeIndex(accidents['Start_Time']).month
    accidents['Day'] = pd.DatetimeIndex(accidents['Start_Time']).day
    accidents['Hour'] = pd.DatetimeIndex(accidents['Start_Time']).hour
    
    #-------------------------------------------------------#
    #-------------------------------------------------------#
 
    #drop unnecessary Start_Time column
    accidents = accidents.drop(['Start_Time'], axis=1)

    #-------------------------------------------------------#
    #-------------------------------------------------------#
 
    #remove expanded zip code value
    accidents2 = accidents['Zipcode'].str.split('-', expand=True)
    accidents['Zipcode'] = accidents2[0]

    #-------------------------------------------------------#
    #-------------------------------------------------------#
 
    #Combining City/State and County/State to prevent wrong city or county aggregation
    accidents['City_State'] = accidents['City'].astype(str) + ',' + accidents['State'].astype(str)
    accidents['County_State'] = accidents['County'].astype(str) + ',' + accidents['State'].astype(str)
    accidents['City'] = accidents['City_State']
    accidents['County'] = accidents['County_State']
    accidents = accidents.drop(['City_State', 'County_State'], axis=1)

    #-------------------------------------------------------#
    #-------------------------------------------------------#
 
    #Replacing wind directions to cardinal 8 directions
    accidents['Wind_Direction'].replace('CALM', 'Calm', inplace=True)
    accidents['Wind_Direction'].replace(['ENE', 'NNE'], 'NE', inplace=True)
    accidents['Wind_Direction'].replace(['ESE', 'SSE'], 'SE', inplace=True)
    accidents['Wind_Direction'].replace(['WNW', 'NNW'], 'NW', inplace=True)
    accidents['Wind_Direction'].replace(['WSW', 'SSW'], 'SW', inplace=True)
    accidents['Wind_Direction'].replace('North', 'N', inplace=True)
    accidents['Wind_Direction'].replace('East', 'E', inplace=True)
    accidents['Wind_Direction'].replace('South', 'S', inplace=True)
    accidents['Wind_Direction'].replace('West', 'W', inplace=True)
    accidents['Wind_Direction'].replace('VAR', 'Variable', inplace=True)

    #-------------------------------------------------------#
    #-------------------------------------------------------#
 
    #Chaning Weather_Condition NaN to "Unknown"
    accidents['Weather_Condition'] = accidents['Weather_Condition'].fillna('Unkown')

    #-------------------------------------------------------#
    #-------------------------------------------------------#
 
    #Chaning Temperature NaN to "Unknown"
    accidents['Temperature(F)'] = accidents['Temperature(F)'].fillna('Unkown')

    #-------------------------------------------------------#
    #-------------------------------------------------------#

    #Binning Humidity
    #Chaning Humidity NaN to "0"
    accidents['Humidity(%)'] = accidents['Humidity(%)'].fillna('101')
    #change column to numerical
    accidents['Humidity(%)'] = accidents['Humidity(%)'].astype(int)
    #Binning Humidity
    bins = [0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 101]
    #create labels for the bins
    group_labels = ["0-9%", "10-19%", "20-29%", "30-39%", "40-49%", "50-59%", "60-69%", "70-79%", "80-89%", "90-100%", "Unknown"] 
    #slice the Humidity data and place into bins
    accidents['Humidity(%)'] = pd.cut(accidents['Humidity(%)'], bins, labels=group_labels)
  
    #-------------------------------------------------------#
    #-------------------------------------------------------#

    #Chaning Pressure NaN to "Unknown"
    accidents['Pressure(in)'] = accidents['Pressure(in)'].fillna('Unkown')

    #-------------------------------------------------------#
    #-------------------------------------------------------#
 
    #Dropping Day/Night NaN values - because daylight hours change and cannot be reliably determined by time
    noDayNight = accidents[accidents['Day/Night'].isnull()]
    accidents = accidents.drop(noDayNight.index)

    #-------------------------------------------------------#
    #-------------------------------------------------------#
 
    #Filling in missing Zipcodes
    accidents['Zipcode'] = np.where((accidents['Zipcode'].isnull()) & (accidents['City'] == 'St. Petersburg,FL'), '33713', accidents.Zipcode)
    accidents['Zipcode'] = np.where((accidents['Zipcode'].isnull()) & (accidents['City'] == 'District 4 Kent Island,MD'), '21666', accidents.Zipcode)
    accidents['Zipcode'] = np.where((accidents['Zipcode'].isnull()) & (accidents['City'] == 'Ross Valley,CA'), '94939', accidents.Zipcode)
    accidents['Zipcode'] = np.where((accidents['Zipcode'].isnull()) & (accidents['City'] == 'Springville-Mapleton,UT'), '84663', accidents.Zipcode)
    accidents['Zipcode'] = np.where((accidents['Zipcode'].isnull()) & (accidents['City'] == 'West Contra Costa,CA'), '94530', accidents.Zipcode)
    accidents['Zipcode'] = np.where((accidents['Zipcode'].isnull()) & (accidents['City'] == 'San Mateo,CA'), '94403', accidents.Zipcode)
    accidents['Zipcode'] = np.where((accidents['Zipcode'].isnull()) & (accidents['City'] == 'East Tehama,CA'), '96090', accidents.Zipcode)
    accidents['Zipcode'] = np.where((accidents['Zipcode'].isnull()) & (accidents['City'] == 'Southeast Marin,CA'), '94956', accidents.Zipcode)
    accidents['Zipcode'] = np.where((accidents['Zipcode'].isnull()) & (accidents['City'] == 'District 5,MD'), '20659', accidents.Zipcode)
    accidents['Zipcode'] = np.where((accidents['Zipcode'].isnull()) & (accidents['City'] == 'Avalon-Mulat,FL'), '32583', accidents.Zipcode)

    #-------------------------------------------------------#
    #-------------------------------------------------------#
 
    #Removing remaining NaN Zipcodes:
    no_zipcode = accidents[accidents['Zipcode'].isnull()]
    accidents = accidents.drop(no_zipcode.index)

    #-------------------------------------------------------#
    #-------------------------------------------------------#
 
    #Correcting Precipitation(in) NaN to 0 because if there was precipitation it would have likely been recorded. 
    accidents['Precipitation(in)'] = accidents['Precipitation(in)'].fillna(0.00)

    #-------------------------------------------------------#
    #-------------------------------------------------------#
 
    #Correcting Wind_Speed(mph) NaN to 0 because if there was wind it would have likely been recorded. 
    accidents['Wind_Speed(mph)'] = accidents['Wind_Speed(mph)'].fillna(0.0)

    #-------------------------------------------------------#
    #-------------------------------------------------------#
 

    #Changing unknown Wind_Direction with Wind_Speed(mi)>0 to "Variable"
    accidents['Wind_Direction'] = np.where((accidents['Wind_Direction'].isnull()) & (accidents['Wind_Speed(mph)'] > 0), 'Variable', accidents.Wind_Direction)

    #-------------------------------------------------------#
    #-------------------------------------------------------#
 
    #Changing Wind_Direction NaN with Wind_Speed(mi) of 0 to "Calm"
    accidents['Wind_Direction'] = np.where((accidents['Wind_Direction'].isnull()) & (accidents['Wind_Speed(mph)'] == 0), 'Calm', accidents.Wind_Direction)

    #-------------------------------------------------------#
    #-------------------------------------------------------#
 
    #Change Visibilty(mi) NaN values to the median of 10.0
    accidents['Visibility(mi)'] = accidents['Visibility(mi)'].fillna(10.00)

    #-------------------------------------------------------#
    #-------------------------------------------------------#
 
    #REMOVING OUTLIERS:

    #eliminating Wind_Speed outliers
    high_wind = accidents[accidents['Wind_Speed(mph)'] > 100]
    accidents = accidents.drop(high_wind.index)

    #eliminating Precipitation(in) outliers
    high_rain = accidents[accidents['Precipitation(in)'] > 13]
    accidents = accidents.drop(high_rain.index)

    #-------------------------------------------------------#
    #-------------------------------------------------------#
 
    #Weather Consolidtation
    #Clear
    accidents['Weather_Condition'].replace('Clear', 'Fair', inplace=True)
    accidents['Weather_Condition'].replace('N/A Precipitation', 'Fair', inplace=True)

    #Unknown
    accidents['Weather_Condition'].replace('N/A Preciptiation', 'Unknown', inplace=True)

    #Cloudy
    accidents['Weather_Condition'].replace('Mostly Cloudy', 'Mostly_Cloudy', inplace=True)
    accidents['Weather_Condition'].replace('Partly Cloudy', 'Partly_Cloudy', inplace=True)
    accidents['Weather_Condition'].replace('Overcast', 'Mostly_Cloudy', inplace=True)
    accidents['Weather_Condition'].replace('Scattered Clouds', 'Partly_Cloudy', inplace=True)

    #Rain
    accidents['Weather_Condition'].replace('Light Rain', 'Light_Rain', inplace=True)
    accidents['Weather_Condition'].replace('Heavy Rain', 'Heavy_Rain', inplace=True)
    accidents['Weather_Condition'].replace('Light Rain Shower', 'Light_Rain', inplace=True)
    accidents['Weather_Condition'].replace('Light Rain Showers', 'Light_Rain', inplace=True)
    accidents['Weather_Condition'].replace('Light Drizzle', 'Light_Rain', inplace=True)
    accidents['Weather_Condition'].replace('Light Freezing Drizzle', 'Light_Rain', inplace=True)
    accidents['Weather_Condition'].replace('Light Freezing Rain', 'Light_Rain', inplace=True)
    accidents['Weather_Condition'].replace('Drizzle', 'Light_Rain', inplace=True)
    accidents['Weather_Condition'].replace('Heavy Drizzle', 'Rain', inplace=True)
    accidents['Weather_Condition'].replace('Showers in the Vicinity', 'Rain', inplace=True)
    accidents['Weather_Condition'].replace('Rain Showers', 'Rain', inplace=True)
    accidents['Weather_Condition'].replace('Rain Shower', 'Rain', inplace=True)
    accidents['Weather_Condition'].replace('Freezing Rain', 'Rain', inplace=True)
    accidents['Weather_Condition'].replace('Heavy Rain Shower', 'Heavy_Rain', inplace=True)
    accidents['Weather_Condition'].replace('Heavy Freezing Rain', 'Heavy_Rain', inplace=True)
    accidents['Weather_Condition'].replace('Freezing Drizzle', 'Rain', inplace=True)
    accidents['Weather_Condition'].replace('Heavy Rain Showers', 'Heavy_Rain', inplace=True)
    accidents['Weather_Condition'].replace('Heavy Freezing Drizzle', 'Rain', inplace=True)

    #Fog
    accidents['Weather_Condition'].replace('Patches of Fog', 'Fog', inplace=True)
    accidents['Weather_Condition'].replace('Light Freezing Fog', 'Fog', inplace=True)
    accidents['Weather_Condition'].replace('Partial Fog', 'Fog', inplace=True)
    accidents['Weather_Condition'].replace('Light Fog', 'Fog', inplace=True)
    accidents['Weather_Condition'].replace('Shallow Fog', 'Shallow_Fog', inplace=True)

    #Smoke/Haze
    accidents['Weather_Condition'].replace('Smoke', 'Smoke/Haze', inplace=True)
    accidents['Weather_Condition'].replace('Haze', 'Smoke/Haze', inplace=True)
    accidents['Weather_Condition'].replace('Light Haze', 'Smoke/Haze', inplace=True)
    accidents['Weather_Condition'].replace('Heavy Smoke', 'Smoke/Haze', inplace=True)

    #Thunderstorms
    accidents['Weather_Condition'].replace('T-Storm', 'Thunderstorm', inplace=True)
    accidents['Weather_Condition'].replace('Light Thunderstorms and Rain', 'Thunderstorm', inplace=True)
    accidents['Weather_Condition'].replace('Thunder in the Vicinity', 'Thunder', inplace=True)
    accidents['Weather_Condition'].replace('Light Rain with Thunder', 'Thunderstorm', inplace=True)
    accidents['Weather_Condition'].replace('Heavy Thunderstorms and Rain', 'Thunderstorm', inplace=True)
    accidents['Weather_Condition'].replace('Heavy T-Storm', 'Thunderstorm', inplace=True)
    accidents['Weather_Condition'].replace('Thunderstorms and Rain', 'Thunderstorm', inplace=True)
    accidents['Weather_Condition'].replace('Light Thunderstorms and Snow', 'Thunderstorms/Snow', inplace=True)
    accidents['Weather_Condition'].replace('Thunderstorms and Snow', 'Thunderstorms/Snow', inplace=True)
    accidents['Weather_Condition'].replace('Light Thunderstorm', 'Thunderstorm', inplace=True)

    #Snow
    accidents['Weather_Condition'].replace('Light Snow', 'Light_Snow', inplace=True)
    accidents['Weather_Condition'].replace('Heavy Snow', 'Heavy_Snow', inplace=True)
    accidents['Weather_Condition'].replace('Light Snow Showers', 'Snow', inplace=True)
    accidents['Weather_Condition'].replace('Snow Grains', 'Snow', inplace=True)
    accidents['Weather_Condition'].replace('Light Blowing Snow', 'Snow/Windy', inplace=True)
    accidents['Weather_Condition'].replace('Light Snow Grains', 'Snow', inplace=True)
    accidents['Weather_Condition'].replace('Low Drifting Snow', 'Snow', inplace=True)
    accidents['Weather_Condition'].replace('Snow Showers', 'Snow', inplace=True)
    accidents['Weather_Condition'].replace('Heavy Blowing Snow', 'Heavy Snow/Windy', inplace=True)
    accidents['Weather_Condition'].replace('Light Snow Shower', 'Snow', inplace=True)
    accidents['Weather_Condition'].replace('Drifting Snow', 'Snow/Windy', inplace=True)

    #Hail
    accidents['Weather_Condition'].replace('Light Ice Pellets', 'Hail', inplace=True)
    accidents['Weather_Condition'].replace('Ice Pellets', 'Hail', inplace=True)
    accidents['Weather_Condition'].replace('Small Hail', 'Hail', inplace=True)
    accidents['Weather_Condition'].replace('Heavy Ice Pellets', 'Hail', inplace=True)
    accidents['Weather_Condition'].replace('Light Hail', 'Hail', inplace=True)

    #Sleet
    accidents['Weather_Condition'].replace('Light Sleet', 'Sleet', inplace=True)
    accidents['Weather_Condition'].replace('Heavy Sleet', 'Sleet', inplace=True)

    #Dust
    accidents['Weather_Condition'].replace('Sand', 'Dust', inplace=True)
    accidents['Weather_Condition'].replace('Widespread Dust', 'Dust', inplace=True)

    #Tornado
    accidents['Weather_Condition'].replace('Tornado', 'Funnel_Cloud', inplace=True) 
    accidents['Weather_Condition'].replace('Funnel Cloud', 'Funnel_Cloud', inplace=True)    

    #Dust Whirls
    accidents['Weather_Condition'].replace('Dust Whirls', 'Dust_Whirls', inplace=True)

    #Wintry Mix
    accidents['Weather_Condition'].replace('Wintry Mix', 'Wintry_Mix', inplace=True) 

    #Wind
    accidents = accidents.assign(Weather_Condition_Wind = 0)

    #Volcanic Ash
    accidents['Weather_Condition'].replace('Volcanic Ash', 'Volcanic_Ash', inplace=True)

    #-------------------------------------------------------#
    #-------------------------------------------------------#
               
    #Changing Side to R=0 and L=1
    accidents['Side'].replace('R', '0', inplace=True)
    accidents['Side'].replace('L', '1', inplace=True)


    #-------------------------------------------------------#
    #-------------------------------------------------------#
 
    #Converting Bool columns to integers to prepare for scaling and ML modeling
    bool_cols = accidents.dtypes[accidents.dtypes == 'bool'].index.tolist()
    bool_int = accidents[bool_cols].astype(int)
    encoding = accidents.copy()
    encoding = accidents.drop(bool_int,1)
    #encoding = encoding.merge(bool_int, left_index=True, right_index=True)
    accidents = encoding.merge(bool_int, left_index=True, right_index=True)


    #Encoding Obj columns using OneHotEncoder


    return(accidents)

In [6]:
accidents = accidents_ETL(data)
accidents

Unnamed: 0,Severity,Side,City,County,State,Zipcode,Temperature(F),Humidity(%),Pressure(in),Visibility(mi),Wind_Direction,Wind_Speed(mph),Precipitation(in),Weather_Condition,Day/Night,Year,Month,Day,Hour,Weather_Condition_Wind,Bump,Crossing,Give_Way,Junction,No_Exit,Railway,Roundabout,Station,Stop,Traffic_Calming,Traffic_Signal,Turning_Loop
0,2,0,"Greenville,SC","Greenville,SC",SC,29607,76.0,50-59%,28.91,10.0,N,7.0,0.0,Fair,Day,2019,5,21,8,0,0,0,0,0,0,0,0,0,0,0,0,0
1,2,0,"Charlotte,NC","Mecklenburg,NC",NC,28270,76.0,60-69%,29.3,10.0,Variable,3.0,0.0,Cloudy,Day,2019,10,7,17,0,0,0,0,0,0,0,0,0,0,0,0,0
2,2,0,"Los Gatos,CA","Santa Clara,CA",CA,95033,51.0,70-79%,30.17,10.0,W,6.0,0.0,Fair,Night,2020,12,13,21,0,0,0,0,0,0,0,0,0,0,0,0,0
3,2,0,"Carson City,NV","Douglas,NV",NV,89705,53.6,10-19%,30.16,10.0,SW,4.6,0.0,Fair,Day,2018,4,17,16,0,0,0,0,0,0,0,0,0,0,0,1,0
4,3,0,"Fort Lauderdale,FL","Broward,FL",FL,33324,84.2,80-89%,29.92,10.0,SE,13.8,0.0,Mostly_Cloudy,Day,2016,8,31,17,0,0,0,0,1,0,0,0,0,0,0,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2906605,2,1,"Houston,TX","Harris,TX",TX,77018,84.2,60-69%,30.02,9.0,Variable,5.8,0.0,Fair,Day,2018,6,28,8,0,0,0,0,0,0,0,0,0,1,0,0,0
2906606,2,0,"Colton,CA","San Bernardino,CA",CA,92324,46.9,70-79%,30.14,10.0,Calm,0.0,0.0,Fair,Night,2019,1,10,2,0,0,0,0,0,0,0,0,0,0,0,0,0
2906607,2,1,"Miami,FL","Miami-Dade,FL",FL,33173,76.0,80-89%,30.0,10.0,NW,16.0,0.0,Mostly_Cloudy,Day,2020,11,23,12,0,0,0,0,0,0,0,0,0,0,0,0,0
2906608,2,0,"Salt Lake City,UT","Salt Lake,UT",UT,84129,27.0,80-89%,25.81,10.0,SE,8.0,0.0,Cloudy,Night,2019,12,29,22,0,0,0,0,0,0,0,0,0,0,0,0,0


In [7]:
pd.options.display.max_rows = None
print(accidents.dtypes)
pd.options.display.max_rows = 20

Severity                     int64
Side                        object
City                        object
County                      object
State                       object
Zipcode                     object
Temperature(F)              object
Humidity(%)               category
Pressure(in)                object
Visibility(mi)             float64
Wind_Direction              object
Wind_Speed(mph)            float64
Precipitation(in)          float64
Weather_Condition           object
Day/Night                   object
Year                         int64
Month                        int64
Day                          int64
Hour                         int64
Weather_Condition_Wind       int64
Bump                         int64
Crossing                     int64
Give_Way                     int64
Junction                     int64
No_Exit                      int64
Railway                      int64
Roundabout                   int64
Station                      int64
Stop                

In [8]:
pd.options.display.max_rows = None
test = accidents['Weather_Condition']

desc = test.describe()
nan = test.isnull().sum()
counts = test.value_counts(dropna=False)

print(f'Stats: {desc}')
print(f'NaN: {nan}')
print(f'Value Counts: {counts}')
pd.options.display.max_rows = 20

Stats: count     2906065
unique         77
top          Fair
freq      1192074
Name: Weather_Condition, dtype: object
NaN: 0
Value Counts: Fair                                   1192074
Mostly_Cloudy                           623174
Partly_Cloudy                           395915
Cloudy                                  245043
Light_Rain                              154686
Unkown                                   71479
Light_Snow                               39939
Smoke/Haze                               39033
Fog                                      36033
Rain                                     34130
Thunderstorm                             16437
Heavy_Rain                               12342
Fair / Windy                              9099
Thunder                                   5143
Mostly Cloudy / Windy                     5098
Cloudy / Windy                            4769
Snow                                      4617
Partly Cloudy / Windy                     3053
Light Rain / Wi

In [9]:
pd.options.display.max_columns = 150
#Encoding Weather_Condition with get_dummies to prevent all object columns from converting at once.H
accidents = pd.get_dummies(accidents, columns=['Weather_Condition'])
accidents

Unnamed: 0,Severity,Side,City,County,State,Zipcode,Temperature(F),Humidity(%),Pressure(in),Visibility(mi),Wind_Direction,Wind_Speed(mph),Precipitation(in),Day/Night,Year,Month,Day,Hour,Weather_Condition_Wind,Bump,Crossing,Give_Way,Junction,No_Exit,Railway,Roundabout,Station,Stop,Traffic_Calming,Traffic_Signal,Turning_Loop,Weather_Condition_Blowing Dust,Weather_Condition_Blowing Dust / Windy,Weather_Condition_Blowing Sand,Weather_Condition_Blowing Snow,Weather_Condition_Blowing Snow / Windy,Weather_Condition_Cloudy,Weather_Condition_Cloudy / Windy,Weather_Condition_Drizzle / Windy,Weather_Condition_Drizzle and Fog,Weather_Condition_Dust,Weather_Condition_Dust_Whirls,Weather_Condition_Fair,Weather_Condition_Fair / Windy,Weather_Condition_Fog,Weather_Condition_Fog / Windy,Weather_Condition_Freezing Rain / Windy,Weather_Condition_Funnel_Cloud,Weather_Condition_Hail,Weather_Condition_Haze / Windy,Weather_Condition_Heavy Rain / Windy,Weather_Condition_Heavy Snow / Windy,Weather_Condition_Heavy Snow with Thunder,Weather_Condition_Heavy Snow/Windy,Weather_Condition_Heavy T-Storm / Windy,Weather_Condition_Heavy Thunderstorms and Snow,Weather_Condition_Heavy Thunderstorms with Small Hail,Weather_Condition_Heavy_Rain,Weather_Condition_Heavy_Snow,Weather_Condition_Light Drizzle / Windy,Weather_Condition_Light Freezing Rain / Windy,Weather_Condition_Light Rain / Windy,Weather_Condition_Light Rain Shower / Windy,Weather_Condition_Light Sleet / Windy,Weather_Condition_Light Snow / Windy,Weather_Condition_Light Snow and Sleet,Weather_Condition_Light Snow and Sleet / Windy,Weather_Condition_Light Snow with Thunder,Weather_Condition_Light_Rain,Weather_Condition_Light_Snow,Weather_Condition_Mist,Weather_Condition_Mist / Windy,Weather_Condition_Mostly Cloudy / Windy,Weather_Condition_Mostly_Cloudy,Weather_Condition_Partly Cloudy / Windy,Weather_Condition_Partly_Cloudy,Weather_Condition_Patches of Fog / Windy,Weather_Condition_Rain,Weather_Condition_Rain / Windy,Weather_Condition_Rain and Sleet,Weather_Condition_Sand / Dust Whirls Nearby,Weather_Condition_Sand / Dust Whirlwinds,Weather_Condition_Shallow_Fog,Weather_Condition_Sleet,Weather_Condition_Sleet / Windy,Weather_Condition_Smoke / Windy,Weather_Condition_Smoke/Haze,Weather_Condition_Snow,Weather_Condition_Snow / Windy,Weather_Condition_Snow and Sleet,Weather_Condition_Snow and Sleet / Windy,Weather_Condition_Snow/Windy,Weather_Condition_Squalls,Weather_Condition_Squalls / Windy,Weather_Condition_T-Storm / Windy,Weather_Condition_Thunder,Weather_Condition_Thunder / Windy,Weather_Condition_Thunder / Wintry Mix,Weather_Condition_Thunder / Wintry Mix / Windy,Weather_Condition_Thunder and Hail,Weather_Condition_Thunder and Hail / Windy,Weather_Condition_Thunderstorm,Weather_Condition_Thunderstorms/Snow,Weather_Condition_Unkown,Weather_Condition_Volcanic_Ash,Weather_Condition_Widespread Dust / Windy,Weather_Condition_Wintry Mix / Windy,Weather_Condition_Wintry_Mix
0,2,0,"Greenville,SC","Greenville,SC",SC,29607,76.0,50-59%,28.91,10.0,N,7.0,0.0,Day,2019,5,21,8,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,2,0,"Charlotte,NC","Mecklenburg,NC",NC,28270,76.0,60-69%,29.3,10.0,Variable,3.0,0.0,Day,2019,10,7,17,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,2,0,"Los Gatos,CA","Santa Clara,CA",CA,95033,51.0,70-79%,30.17,10.0,W,6.0,0.0,Night,2020,12,13,21,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,2,0,"Carson City,NV","Douglas,NV",NV,89705,53.6,10-19%,30.16,10.0,SW,4.6,0.0,Day,2018,4,17,16,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,3,0,"Fort Lauderdale,FL","Broward,FL",FL,33324,84.2,80-89%,29.92,10.0,SE,13.8,0.0,Day,2016,8,31,17,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2906605,2,1,"Houston,TX","Harris,TX",TX,77018,84.2,60-69%,30.02,9.0,Variable,5.8,0.0,Day,2018,6,28,8,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2906606,2,0,"Colton,CA","San Bernardino,CA",CA,92324,46.9,70-79%,30.14,10.0,Calm,0.0,0.0,Night,2019,1,10,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2906607,2,1,"Miami,FL","Miami-Dade,FL",FL,33173,76.0,80-89%,30.0,10.0,NW,16.0,0.0,Day,2020,11,23,12,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2906608,2,0,"Salt Lake City,UT","Salt Lake,UT",UT,84129,27.0,80-89%,25.81,10.0,SE,8.0,0.0,Night,2019,12,29,22,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [10]:
print(accidents['Weather_Condition_Wind'].value_counts())
print(accidents['Weather_Condition_Smoke/Haze'].value_counts())
print(accidents['Weather_Condition_Snow'].value_counts())
print(accidents['Weather_Condition_Light_Snow'].value_counts())
print(accidents['Weather_Condition_Heavy_Snow'].value_counts())
print(accidents['Weather_Condition_Rain'].value_counts())
print(accidents['Weather_Condition_Light_Rain'].value_counts())
print(accidents['Weather_Condition_Heavy_Rain'].value_counts())
print(accidents['Weather_Condition_Hail'].value_counts())
print(accidents['Weather_Condition_Sleet'].value_counts())
print(accidents['Weather_Condition_Cloudy'].value_counts())
print(accidents['Weather_Condition_Partly_Cloudy'].value_counts())
print(accidents['Weather_Condition_Mostly_Cloudy'].value_counts())
print(accidents['Weather_Condition_Funnel_Cloud'].value_counts())
print(accidents['Weather_Condition_Wintry_Mix'].value_counts())
print(accidents['Weather_Condition_Thunderstorm'].value_counts())

0    2906065
Name: Weather_Condition_Wind, dtype: int64
0    2867032
1      39033
Name: Weather_Condition_Smoke/Haze, dtype: int64
0    2901448
1       4617
Name: Weather_Condition_Snow, dtype: int64
0    2866126
1      39939
Name: Weather_Condition_Light_Snow, dtype: int64
0    2904833
1       1232
Name: Weather_Condition_Heavy_Snow, dtype: int64
0    2871935
1      34130
Name: Weather_Condition_Rain, dtype: int64
0    2751379
1     154686
Name: Weather_Condition_Light_Rain, dtype: int64
0    2893723
1      12342
Name: Weather_Condition_Heavy_Rain, dtype: int64
0    2905804
1        261
Name: Weather_Condition_Hail, dtype: int64
0    2906023
1         42
Name: Weather_Condition_Sleet, dtype: int64
0    2661022
1     245043
Name: Weather_Condition_Cloudy, dtype: int64
0    2510150
1     395915
Name: Weather_Condition_Partly_Cloudy, dtype: int64
0    2282891
1     623174
Name: Weather_Condition_Mostly_Cloudy, dtype: int64
0    2906052
1         13
Name: Weather_Condition_Funnel_Cloud, d

In [11]:
#iterate the df and expand the Weather_Condition columns that express more than one condition. Then the combined columns will be dropped.
for index, row in accidents.iterrows():
    if accidents.loc[index, 'Weather_Condition_Smoke / Windy'] == 1:
        accidents.loc[index, 'Weather_Condition_Wind'] = 1
        accidents.loc[index, 'Weather_Condition_Smoke/Haze'] = 1
       
    elif accidents.loc[index, 'Weather_Condition_Blowing Dust'] == 1:
        accidents.loc[index, 'Weather_Condition_Dust'] = 1
        accidents.loc[index, 'Weather_Condition_Wind'] = 1

    elif accidents.loc[index, 'Weather_Condition_Blowing Dust / Windy'] == 1:
        accidents.loc[index, 'Weather_Condition_Dust'] = 1
        accidents.loc[index, 'Weather_Condition_Wind'] = 1

    elif accidents.loc[index, 'Weather_Condition_Blowing Sand'] == 1:
        accidents.loc[index, 'Weather_Condition_Dust'] = 1
        accidents.loc[index, 'Weather_Condition_Wind'] = 1
    
    elif accidents.loc[index, 'Weather_Condition_Blowing Snow'] == 1:
        accidents.loc[index, 'Weather_Condition_Snow'] = 1
        accidents.loc[index, 'Weather_Condition_Wind'] = 1

    elif accidents.loc[index, 'Weather_Condition_Blowing Snow / Windy'] == 1:
        accidents.loc[index, 'Weather_Condition_Snow'] = 1
        accidents.loc[index, 'Weather_Condition_Wind'] = 1

    elif accidents.loc[index, 'Weather_Condition_Cloudy / Windy'] == 1:
        accidents.loc[index, 'Weather_Condition_Cloudy'] = 1
        accidents.loc[index, 'Weather_Condition_Wind'] = 1

    elif accidents.loc[index, 'Weather_Condition_Drizzle / Windy'] == 1:
        accidents.loc[index, 'Weather_Condition_Light_Rain'] = 1
        accidents.loc[index, 'Weather_Condition_Wind'] = 1

    elif accidents.loc[index, 'Weather_Condition_Drizzle and Fog'] == 1:
        accidents.loc[index, 'Weather_Condition_Light_Rain'] = 1
        accidents.loc[index, 'Weather_Condition_Fog'] = 1

    elif accidents.loc[index, 'Weather_Condition_Fair / Windy'] == 1:
        accidents.loc[index, 'Weather_Condition_Fair'] = 1
        accidents.loc[index, 'Weather_Condition_Wind'] = 1

    elif accidents.loc[index, 'Weather_Condition_Fog / Windy'] == 1:
        accidents.loc[index, 'Weather_Condition_Fog'] = 1
        accidents.loc[index, 'Weather_Condition_Wind'] = 1

    elif accidents.loc[index, 'Weather_Condition_Freezing Rain / Windy'] == 1:
        accidents.loc[index, 'Weather_Condition_Wind'] = 1
        accidents.loc[index, 'Weather_Condition_Rain'] = 1

    elif accidents.loc[index, 'Weather_Condition_Haze / Windy'] == 1:
        accidents.loc[index, 'Weather_Condition_Smoke/Haze'] = 1
        accidents.loc[index, 'Weather_Condition_Wind'] = 1
    
    elif accidents.loc[index, 'Weather_Condition_Heavy Rain / Windy'] == 1:
        accidents.loc[index, 'Weather_Condition_Heavy_Rain'] = 1
        accidents.loc[index, 'Weather_Condition_Wind'] = 1
    
    elif accidents.loc[index, 'Weather_Condition_Heavy Snow / Windy'] == 1:
        accidents.loc[index, 'Weather_Condition_Heavy_Snow'] = 1
        accidents.loc[index, 'Weather_Condition_Wind'] = 1
            
    elif accidents.loc[index, 'Weather_Condition_Heavy Snow with Thunder'] == 1:
        accidents.loc[index, 'Weather_Condition_Heavy_Snow'] = 1
        accidents.loc[index, 'Weather_Condition_Thunder'] = 1

    elif accidents.loc[index, 'Weather_Condition_Heavy Snow/Windy'] == 1:
        accidents.loc[index, 'Weather_Condition_Heavy_Snow'] = 1
        accidents.loc[index, 'Weather_Condition_Wind'] = 1

    elif accidents.loc[index, 'Weather_Condition_Heavy T-Storm / Windy'] == 1:
        accidents.loc[index, 'Weather_Condition_Wind'] = 1
        accidents.loc[index, 'Weather_Condition_Thunderstorm'] = 1

    elif accidents.loc[index, 'Weather_Condition_Heavy Thunderstorms and Snow'] == 1:
        accidents.loc[index, 'Weather_Condition_Snow'] = 1
        accidents.loc[index, 'Weather_Condition_Thunderstorm'] = 1

    elif accidents.loc[index, 'Weather_Condition_Heavy Thunderstorms with Small Hail'] == 1:
        accidents.loc[index, 'Weather_Condition_Thunderstorm'] = 1
        accidents.loc[index, 'Weather_Condition_Hail'] = 1

    elif accidents.loc[index, 'Weather_Condition_Light Drizzle / Windy'] == 1:
        accidents.loc[index, 'Weather_Condition_Light_Rain'] = 1
        accidents.loc[index, 'Weather_Condition_Wind'] = 1

    elif accidents.loc[index, 'Weather_Condition_Light Freezing Rain / Windy'] == 1:
        accidents.loc[index, 'Weather_Condition_Wind'] = 1
        accidents.loc[index, 'Weather_Condition_Light_Rain'] = 1
    
    elif accidents.loc[index, 'Weather_Condition_Light Rain / Windy'] == 1:
        accidents.loc[index, 'Weather_Condition_Light_Rain'] = 1
        accidents.loc[index, 'Weather_Condition_Wind'] = 1
    
    elif accidents.loc[index, 'Weather_Condition_Light Rain Shower / Windy'] == 1:
        accidents.loc[index, 'Weather_Condition_Light_Rain'] = 1
        accidents.loc[index, 'Weather_Condition_Wind'] = 1
    
    elif accidents.loc[index, 'Weather_Condition_Light Sleet / Windy'] == 1:
        accidents.loc[index, 'Weather_Condition_Sleet'] = 1
        accidents.loc[index, 'Weather_Condition_Wind'] = 1
                    
    elif accidents.loc[index, 'Weather_Condition_Light Snow / Windy'] == 1:
        accidents.loc[index, 'Weather_Condition_Light_Snow'] = 1
        accidents.loc[index, 'Weather_Condition_Wind'] = 1
                    
    elif accidents.loc[index, 'Weather_Condition_Light Snow and Sleet'] == 1:
        accidents.loc[index, 'Weather_Condition_Light_Snow'] = 1
        accidents.loc[index, 'Weather_Condition_Sleet'] = 1
                    
    elif accidents.loc[index, 'Weather_Condition_Light Snow and Sleet / Windy'] == 1:
        accidents.loc[index, 'Weather_Condition_Light_Snow'] = 1
        accidents.loc[index, 'Weather_Condition_Wind'] = 1
                    
    elif accidents.loc[index, 'Weather_Condition_Light Snow with Thunder'] == 1:
        accidents.loc[index, 'Weather_Condition_Light_Snow'] = 1
        accidents.loc[index, 'Weather_Condition_Thunder'] = 1

    elif accidents.loc[index, 'Weather_Condition_Mist / Windy'] == 1:
        accidents.loc[index, 'Weather_Condition_Mist'] = 1
        accidents.loc[index, 'Weather_Condition_Wind'] = 1

    elif accidents.loc[index, 'Weather_Condition_Mostly Cloudy / Windy'] == 1:
        accidents.loc[index, 'Weather_Condition_Mostly_Cloudy'] = 1
        accidents.loc[index, 'Weather_Condition_Wind'] = 1

    elif accidents.loc[index, 'Weather_Condition_Partly Cloudy / Windy'] == 1:
        accidents.loc[index, 'Weather_Condition_Partly_Cloudy'] = 1
        accidents.loc[index, 'Weather_Condition_Wind'] = 1

    elif accidents.loc[index, 'Weather_Condition_Patches of Fog / Windy'] == 1:
        accidents.loc[index, 'Weather_Condition_Fog'] = 1
        accidents.loc[index, 'Weather_Condition_Wind'] = 1

    elif accidents.loc[index, 'Weather_Condition_Rain / Windy'] == 1:
        accidents.loc[index, 'Weather_Condition_Rain'] = 1
        accidents.loc[index, 'Weather_Condition_Wind'] = 1

    elif accidents.loc[index, 'Weather_Condition_Rain and Sleet'] == 1:
        accidents.loc[index, 'Weather_Condition_Rain'] = 1
        accidents.loc[index, 'Weather_Condition_Sleet'] = 1

    elif accidents.loc[index, 'Weather_Condition_Sand / Dust Whirls Nearby'] == 1:
        accidents.loc[index, 'Weather_Condition_Sand'] = 1
        accidents.loc[index, 'Weather_Condition_Dust_Whirls'] = 1

    elif accidents.loc[index, 'Weather_Condition_Sand / Dust Whirlwinds'] == 1:
        accidents.loc[index, 'Weather_Condition_Sand'] = 1
        accidents.loc[index, 'Weather_Condition_Dust_Whirls'] = 1

    elif accidents.loc[index, 'Weather_Condition_Sleet / Windy'] == 1:
        accidents.loc[index, 'Weather_Condition_Sleet'] = 1
        accidents.loc[index, 'Weather_Condition_Wind'] = 1

    elif accidents.loc[index, 'Weather_Condition_Smoke / Windy'] == 1:
        accidents.loc[index, 'Weather_Condition_Smoke/Haze'] = 1
        accidents.loc[index, 'Weather_Condition_Wind'] = 1

    elif accidents.loc[index, 'Weather_Condition_Snow / Windy'] == 1:
        accidents.loc[index, 'Weather_Condition_Snow'] = 1
        accidents.loc[index, 'Weather_Condition_Wind'] = 1
            
    elif accidents.loc[index, 'Weather_Condition_Snow and Sleet'] == 1:
        accidents.loc[index, 'Weather_Condition_Snow'] = 1
        accidents.loc[index, 'Weather_Condition_Sleet'] = 1

    elif accidents.loc[index, 'Weather_Condition_Snow and Sleet / Windy'] == 1:
        accidents.loc[index, 'Weather_Condition_Snow'] = 1
        accidents.loc[index, 'Weather_Condition_Sleet'] = 1
        accidents.loc[index, 'Weather_Condition_Wind'] = 1

    elif accidents.loc[index, 'Weather_Condition_Snow/Windy'] == 1:
        accidents.loc[index, 'Weather_Condition_Snow'] = 1
        accidents.loc[index, 'Weather_Condition_Wind'] = 1

    elif accidents.loc[index, 'Weather_Condition_Squalls / Windy'] == 1:
        accidents.loc[index, 'Weather_Condition_Squalls'] = 1
        accidents.loc[index, 'Weather_Condition_Wind'] = 1

    elif accidents.loc[index, 'Weather_Condition_T-Storm / Windy'] == 1:
        accidents.loc[index, 'Weather_Condition_Thunderstorm'] = 1
        accidents.loc[index, 'Weather_Condition_Wind'] = 1

    elif accidents.loc[index, 'Weather_Condition_Thunder / Windy'] == 1:
        accidents.loc[index, 'Weather_Condition_Thunder'] = 1
        accidents.loc[index, 'Weather_Condition_Wind'] = 1

    elif accidents.loc[index, 'Weather_Condition_Thunder / Wintry Mix'] == 1:
        accidents.loc[index, 'Weather_Condition_Thunder'] = 1
        accidents.loc[index, 'Weather_Condition_Wintry_Mix'] = 1

    elif accidents.loc[index, 'Weather_Condition_Thunder / Wintry Mix / Windy'] == 1:
        accidents.loc[index, 'Weather_Condition_Wintry_Mix'] = 1
        accidents.loc[index, 'Weather_Condition_Wind'] = 1

    elif accidents.loc[index, 'Weather_Condition_Thunder and Hail'] == 1:
        accidents.loc[index, 'Weather_Condition_Thunder'] = 1
        accidents.loc[index, 'Weather_Condition_Hail'] = 1

    elif accidents.loc[index, 'Weather_Condition_Thunder and Hail / Windy'] == 1:
        accidents.loc[index, 'Weather_Condition_Hail'] = 1
        accidents.loc[index, 'Weather_Condition_Wind'] = 1

    elif accidents.loc[index, 'Weather_Condition_Thunderstorms/Snow'] == 1:
        accidents.loc[index, 'Weather_Condition_Thunderstorm'] = 1
        accidents.loc[index, 'Weather_Condition_Snow'] = 1

    elif accidents.loc[index, 'Weather_Condition_Widespread Dust / Windy'] == 1:
        accidents.loc[index, 'Weather_Condition_Dust'] = 1
        accidents.loc[index, 'Weather_Condition_Wind'] = 1

    elif accidents.loc[index, 'Weather_Condition_Wintry Mix / Windy'] == 1:
        accidents.loc[index, 'Weather_Condition_Wintry_Mix'] = 1
        accidents.loc[index, 'Weather_Condition_Wind'] = 1
            


KeyboardInterrupt: 

In [None]:
#Set columns to drop 
cols_to_drop = ['Weather_Condition_Smoke / Windy', 'Weather_Condition_Blowing Dust', 'Weather_Condition_Blowing Dust / Windy', 'Weather_Condition_Blowing Sand', 'Weather_Condition_Blowing Snow', 'Weather_Condition_Blowing Snow / Windy', 'Weather_Condition_Cloudy / Windy', 'Weather_Condition_Drizzle / Windy', 'Weather_Condition_Drizzle and Fog', 'Weather_Condition_Fair / Windy', 'Weather_Condition_Fog / Windy', 'Weather_Condition_Freezing Rain / Windy', 'Weather_Condition_Haze / Windy', 'Weather_Condition_Heavy Rain / Windy', 'Weather_Condition_Heavy Snow / Windy', 'Weather_Condition_Heavy Snow with Thunder', 'Weather_Condition_Heavy Snow/Windy', 'Weather_Condition_Heavy T-Storm / Windy', 'Weather_Condition_Heavy Thunderstorms and Snow', 'Weather_Condition_Heavy Thunderstorms with Small Hail', 'Weather_Condition_Light Drizzle / Windy', 'Weather_Condition_Light Freezing Rain / Windy', 'Weather_Condition_Light Rain / Windy', 'Weather_Condition_Light Rain Shower / Windy', 'Weather_Condition_Light Sleet / Windy', 'Weather_Condition_Light Snow / Windy', 'Weather_Condition_Light Snow and Sleet', 'Weather_Condition_Light Snow and Sleet / Windy', 'Weather_Condition_Light Snow with Thunder', 'Weather_Condition_Mist / Windy', 'Weather_Condition_Mostly Cloudy / Windy', 'Weather_Condition_Partly Cloudy / Windy', 'Weather_Condition_Patches of Fog / Windy', 'Weather_Condition_Rain and Sleet	Weather_Condition_Sand', 'Weather_Condition_Sand / Dust Whirls Nearby', 'Weather_Condition_Sand / Dust Whirlwinds', 'Weather_Condition_Sleet / Windy', 'Weather_Condition_Smoke / Windy', 'Weather_Condition_Snow / Windy', 'Weather_Condition_Snow and Sleet', 'Weather_Condition_Snow and Sleet / Windy', 'Weather_Condition_Snow/Windy', 'Weather_Condition_Squalls / Windy', 'Weather_Condition_T-Storm / Windy', 'Weather_Condition_Thunder / Windy', 'Weather_Condition_Thunder / Wintry Mix', 'Weather_Condition_Thunder / Wintry Mix / Windy', 'Weather_Condition_Thunder and Hail', 'Weather_Condition_Thunder and Hail / Windy', 'Weather_Condition_Thunderstorms/Snow', 'Weather_Condition_Widespread Dust / Windy', 'Weather_Condition_Wintry Mix / Windy']

#Drop the columns
accidents = accidents.drop(columns = cols_to_drop, axis=1)

In [None]:
def allDone():
   display(Audio(url='https://sound.peal.io/ps/audios/000/000/537/original/woo_vu_luvub_dub_dub.wav', autoplay=True))
## Insert whatever audio file you want above


In [None]:
print(accidents['Weather_Condition_Wind'].value_counts())
print(accidents['Weather_Condition_Smoke/Haze'].value_counts())
print(accidents['Weather_Condition_Snow'].value_counts())
print(accidents['Weather_Condition_Light_Snow'].value_counts())
print(accidents['Weather_Condition_Heavy_Snow'].value_counts())
print(accidents['Weather_Condition_Rain'].value_counts())
print(accidents['Weather_Condition_Light_Rain'].value_counts())
print(accidents['Weather_Condition_Heavy_Rain'].value_counts())
print(accidents['Weather_Condition_Hail'].value_counts())
print(accidents['Weather_Condition_Sleet'].value_counts())
print(accidents['Weather_Condition_Cloudy'].value_counts())
print(accidents['Weather_Condition_Partly_Cloudy'].value_counts())
print(accidents['Weather_Condition_Mostly_Cloudy'].value_counts())
print(accidents['Weather_Condition_Funnel_Cloud'].value_counts())
print(accidents['Weather_Condition_Wintry_Mix'].value_counts())
print(accidents['Weather_Condition_Thunderstorm'].value_counts())

In [45]:
accidents

Unnamed: 0,Severity,Side,City,County,State,Zipcode,Temperature(F),Humidity(%),Pressure(in),Visibility(mi),Wind_Direction,Wind_Speed(mph),Precipitation(in),Day/Night,Year,Month,Day,Hour,Bump,Crossing,Give_Way,Junction,No_Exit,Railway,Roundabout,Station,Stop,Traffic_Calming,Traffic_Signal,Turning_Loop,Weather_Condition_Blowing Dust,Weather_Condition_Blowing Dust / Windy,Weather_Condition_Blowing Sand,Weather_Condition_Blowing Snow,Weather_Condition_Blowing Snow / Windy,Weather_Condition_Cloudy,Weather_Condition_Cloudy / Windy,Weather_Condition_Drizzle / Windy,Weather_Condition_Drizzle and Fog,Weather_Condition_Dust,Weather_Condition_Dust Whirls,Weather_Condition_Fair,Weather_Condition_Fair / Windy,Weather_Condition_Fog,Weather_Condition_Fog / Windy,Weather_Condition_Freezing Rain / Windy,Weather_Condition_Funnel Cloud,Weather_Condition_Hail,Weather_Condition_Haze / Windy,Weather_Condition_Heavy Rain,Weather_Condition_Heavy Rain / Windy,Weather_Condition_Heavy Snow,Weather_Condition_Heavy Snow / Windy,Weather_Condition_Heavy Snow with Thunder,Weather_Condition_Heavy Snow/Windy,Weather_Condition_Heavy T-Storm / Windy,Weather_Condition_Heavy Thunderstorms and Snow,Weather_Condition_Heavy Thunderstorms with Small Hail,Weather_Condition_Light Drizzle / Windy,Weather_Condition_Light Freezing Rain / Windy,Weather_Condition_Light Rain,Weather_Condition_Light Rain / Windy,Weather_Condition_Light Rain Shower / Windy,Weather_Condition_Light Sleet / Windy,Weather_Condition_Light Snow,Weather_Condition_Light Snow / Windy,Weather_Condition_Light Snow and Sleet,Weather_Condition_Light Snow and Sleet / Windy,Weather_Condition_Light Snow with Thunder,Weather_Condition_Mist,Weather_Condition_Mist / Windy,Weather_Condition_Mostly Cloudy,Weather_Condition_Mostly Cloudy / Windy,Weather_Condition_Partly Cloudy,Weather_Condition_Partly Cloudy / Windy,Weather_Condition_Patches of Fog / Windy,Weather_Condition_Rain,Weather_Condition_Rain / Windy,Weather_Condition_Rain and Sleet,Weather_Condition_Sand / Dust Whirls Nearby,Weather_Condition_Sand / Dust Whirlwinds,Weather_Condition_Shallow Fog,Weather_Condition_Sleet,Weather_Condition_Sleet / Windy,Weather_Condition_Smoke / Windy,Weather_Condition_Smoke/Haze,Weather_Condition_Snow,Weather_Condition_Snow / Windy,Weather_Condition_Snow and Sleet,Weather_Condition_Snow and Sleet / Windy,Weather_Condition_Snow/Windy,Weather_Condition_Squalls,Weather_Condition_Squalls / Windy,Weather_Condition_T-Storm / Windy,Weather_Condition_Thunder,Weather_Condition_Thunder / Windy,Weather_Condition_Thunder / Wintry Mix,Weather_Condition_Thunder / Wintry Mix / Windy,Weather_Condition_Thunder and Hail,Weather_Condition_Thunder and Hail / Windy,Weather_Condition_Thunderstorm,Weather_Condition_Thunderstorms/Snow,Weather_Condition_Unkown,Weather_Condition_Volcanic Ash,Weather_Condition_Widespread Dust / Windy,Weather_Condition_Wintry Mix,Weather_Condition_Wintry Mix / Windy,Weather_Condition_Windy
0,2,0,"Greenville,SC","Greenville,SC",SC,29607,76.0,50-59%,28.91,10.0,N,7.0,0.0,Day,2019,5,21,8,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,2,0,"Charlotte,NC","Mecklenburg,NC",NC,28270,76.0,60-69%,29.3,10.0,Variable,3.0,0.0,Day,2019,10,7,17,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,2,0,"Los Gatos,CA","Santa Clara,CA",CA,95033,51.0,70-79%,30.17,10.0,W,6.0,0.0,Night,2020,12,13,21,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,2,0,"Carson City,NV","Douglas,NV",NV,89705,53.6,10-19%,30.16,10.0,SW,4.6,0.0,Day,2018,4,17,16,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,3,0,"Fort Lauderdale,FL","Broward,FL",FL,33324,84.2,80-89%,29.92,10.0,SE,13.8,0.0,Day,2016,8,31,17,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2906605,2,1,"Houston,TX","Harris,TX",TX,77018,84.2,60-69%,30.02,9.0,Variable,5.8,0.0,Day,2018,6,28,8,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2906606,2,0,"Colton,CA","San Bernardino,CA",CA,92324,46.9,70-79%,30.14,10.0,Calm,0.0,0.0,Night,2019,1,10,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2906607,2,1,"Miami,FL","Miami-Dade,FL",FL,33173,76.0,80-89%,30.0,10.0,NW,16.0,0.0,Day,2020,11,23,12,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2906608,2,0,"Salt Lake City,UT","Salt Lake,UT",UT,84129,27.0,80-89%,25.81,10.0,SE,8.0,0.0,Night,2019,12,29,22,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [44]:
print(accidents['Weather_Condition_Smoke / Windy'].value_counts())
print(accidents['Weather_Condition_Windy'].value_counts())
print(accidents['Weather_Condition_Smoke/Haze'].value_counts())


0    2906006
1         59
Name: Weather_Condition_Smoke / Windy, dtype: int64
0    2905803
1        262
Name: Weather_Condition_Windy, dtype: int64
0    2866973
1      39092
Name: Weather_Condition_Smoke/Haze, dtype: int64


In [71]:
mask = accidents2['Weather_Condition_Smoke / Windy'] == 1
accidents2['Weather_Condition_Windy'] = np.where(mask, 1, 0)
accidents2['Weather_Condition_Smoke/Haze'] = np.where(mask, 1, 0)
#accidents2 = accidents2.drop(columns = 'Weather_Condition_Smoke / Windy', axis=1)

In [39]:
#Creating For Loop to expand multi-factor columns into the factors
for x in accidents2:
     if x['Weather_Condition_Smoke / Windy'] == 1: 
         x['Weather_Condition_Smoke/Haze'].replace(0, 1, inplace=True)
         x['Weather_Condition_Windy'].replace(0, 1, inplace=True)
   
#accidents2 = accidents2.drop(['Weather_Condition_Smoke / Windy'], axis=1)

        

TypeError: string indices must be integers

In [186]:
pd.options.display.max_rows = None
print(accidents2.dtypes)
pd.options.display.max_rows = 20

Severity                                                    int64
Side                                                       object
City                                                       object
County                                                     object
State                                                      object
Zipcode                                                    object
Temperature(F)                                             object
Humidity(%)                                              category
Pressure(in)                                               object
Visibility(mi)                                            float64
Wind_Direction                                             object
Wind_Speed(mph)                                           float64
Precipitation(in)                                         float64
Day/Night                                                  object
Year                                                        int64
Month     

ValueError: The truth value of a Series is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all().

In [139]:
accidents2['Weather_Condition_Smoke / Windy'].value_counts()

0    2906006
1         59
Name: Weather_Condition_Smoke / Windy, dtype: int64

In [96]:
obj_cols = accidents.dtypes[accidents.dtypes == 'object'].index.tolist()
obj_cols

['Side',
 'City',
 'County',
 'State',
 'Zipcode',
 'Temperature(F)',
 'Pressure(in)',
 'Wind_Direction',
 'Weather_Condition',
 'Day/Night']

In [12]:
enc = OneHotEncoder(sparse=False)

In [97]:
pd.options.display.max_rows = None
print(accidents.dtypes)
pd.options.display.max_rows = 20

Severity                int64
Side                   object
City                   object
County                 object
State                  object
Zipcode                object
Temperature(F)         object
Humidity(%)          category
Pressure(in)           object
Visibility(mi)        float64
Wind_Direction         object
Wind_Speed(mph)       float64
Precipitation(in)     float64
Weather_Condition      object
Day/Night              object
Year                    int64
Month                   int64
Day                     int64
Hour                    int64
Bump                    int64
Crossing                int64
Give_Way                int64
Junction                int64
No_Exit                 int64
Railway                 int64
Roundabout              int64
Station                 int64
Stop                    int64
Traffic_Calming         int64
Traffic_Signal          int64
Turning_Loop            int64
dtype: object


In [177]:
accidents

Unnamed: 0,Severity,Side,City,County,State,Zipcode,Temperature(F),Humidity(%),Pressure(in),Visibility(mi),Wind_Direction,Wind_Speed(mph),Precipitation(in),Weather_Condition,Day/Night,Year,Month,Day,Hour,Bump,Crossing,Give_Way,Junction,No_Exit,Railway,Roundabout,Station,Stop,Traffic_Calming,Traffic_Signal,Turning_Loop
0,2,0,"Greenville,SC","Greenville,SC",SC,29607,76.0,50-59%,28.91,10.0,N,7.0,0.0,Fair,Day,2019,5,21,8,0,0,0,0,0,0,0,0,0,0,0,0
1,2,0,"Charlotte,NC","Mecklenburg,NC",NC,28270,76.0,60-69%,29.3,10.0,Variable,3.0,0.0,Cloudy,Day,2019,10,7,17,0,0,0,0,0,0,0,0,0,0,0,0
2,2,0,"Los Gatos,CA","Santa Clara,CA",CA,95033,51.0,70-79%,30.17,10.0,W,6.0,0.0,Fair,Night,2020,12,13,21,0,0,0,0,0,0,0,0,0,0,0,0
3,2,0,"Carson City,NV","Douglas,NV",NV,89705,53.6,10-19%,30.16,10.0,SW,4.6,0.0,Fair,Day,2018,4,17,16,0,0,0,0,0,0,0,0,0,0,1,0
4,3,0,"Fort Lauderdale,FL","Broward,FL",FL,33324,84.2,80-89%,29.92,10.0,SE,13.8,0.0,Mostly Cloudy,Day,2016,8,31,17,0,0,0,1,0,0,0,0,0,0,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2906605,2,1,"Houston,TX","Harris,TX",TX,77018,84.2,60-69%,30.02,9.0,Variable,5.8,0.0,Fair,Day,2018,6,28,8,0,0,0,0,0,0,0,0,1,0,0,0
2906606,2,0,"Colton,CA","San Bernardino,CA",CA,92324,46.9,70-79%,30.14,10.0,Calm,0.0,0.0,Fair,Night,2019,1,10,2,0,0,0,0,0,0,0,0,0,0,0,0
2906607,2,1,"Miami,FL","Miami-Dade,FL",FL,33173,76.0,80-89%,30.0,10.0,NW,16.0,0.0,Mostly Cloudy,Day,2020,11,23,12,0,0,0,0,0,0,0,0,0,0,0,0
2906608,2,0,"Salt Lake City,UT","Salt Lake,UT",UT,84129,27.0,80-89%,25.81,10.0,SE,8.0,0.0,Cloudy,Night,2019,12,29,22,0,0,0,0,0,0,0,0,0,0,0,0
