In [1]:
#Dependencies
import pandas as pd
from sqlalchemy import create_engine

#Reading in data
df = pd.read_csv('Resources/ev_stations_v1.csv', low_memory=False)
#df.columns
#df.head()

In [2]:
#Removing unecessary columns
stations = df[['Station Name', 
                'Street Address', 
                'City', 
                'State', 
                'ZIP', 
                'Access Code', 
                'EV Network', 
                'EV Connector Types', 
                'Latitude', 
                'Longitude']]

#stations.head()
stations.shape

(50289, 10)

In [3]:
#Removing null values
stations = stations.dropna()

#stations.head()
stations.shape

(50239, 10)

In [4]:
#Filtering data for only stations in the US (including Washington DC and Puerto Rico)
stations = stations[stations['State'] != 'ON']
#Removing 'ON' or Ontario

#stations['State'].unique()
#---------------------------------------------------------------------------
#output:
#array(['CA', 'VT', 'WA', 'OR', 'IL', 'ID', 'TX', 'TN', 'NY', 'FL', 'WI',
#       'IA', 'MN', 'AR', 'AZ', 'CT', 'MD', 'NJ', 'SC', 'MI', 'OH', 'WV',
#       'MO', 'UT', 'KS', 'MA', 'CO', 'IN', 'LA', 'NC', 'NH', 'VA', 'AL',
#       'GA', 'HI', 'NV', 'DC', 'PA', 'RI', 'OK', 'ME', 'KY', 'NE', 'MS',
#       'SD', 'DE', 'NM', 'MT', 'ND', 'WY', 'AK', 'PR', 'ON'], dtype=object)

len(stations['State'].unique())

52

In [5]:
#Renaming columns
rename = {'Station Name':'station_name', 
            'Street Address':'address', 
            'City':'city', 
            'State':'state', 
            'ZIP':'zip',
            'Access Code':'access', 
            'EV Network':'ev_network', 
            'EV Connector Types':'connector_type',
            'Latitude':'lat',
            'Longitude':'lng'}

stations = stations.rename(mapper=rename, axis=1)

#stations.head()
stations.columns

Index(['station_name', 'address', 'city', 'state', 'zip', 'access',
       'ev_network', 'connector_type', 'lat', 'lng'],
      dtype='object')

In [6]:
# Resetting index
stations = stations.reset_index()

#stations.head()

In [7]:
# Deleting extra index column
del stations['index']

#stations.head()

In [8]:
#Creating engine to sqlite
engine = create_engine('sqlite:///Resources/evstations.sqlite', echo=False)

#Exporting to sqlite
stations.to_sql(name='evstations', con=engine, if_exists='append', index=True)

In [9]:
#Exporting to csv
stations.to_csv('Resources/evstations_clean.csv')

In [10]:
stations.head()

Unnamed: 0,station_name,address,city,state,zip,access,ev_network,connector_type,lat,lng
0,LADWP - Truesdale Center,11797 Truesdale St,Sun Valley,CA,91352,private,Non-Networked,CHADEMO J1772 J1772COMBO,34.248319,-118.387971
1,LADWP - West LA District Office,1394 S Sepulveda Blvd,Los Angeles,CA,90024,private,Non-Networked,J1772,34.052542,-118.448504
2,Los Angeles Convention Center,1201 S Figueroa St,Los Angeles,CA,90015,public,Non-Networked,J1772,34.040539,-118.271387
3,LADWP - John Ferraro Building,111 N Hope St,Los Angeles,CA,90012,private,Non-Networked,CHADEMO J1772 J1772COMBO,34.059133,-118.248589
4,LADWP - Haynes Power Plant,6801 E 2nd St,Long Beach,CA,90803,private,Non-Networked,CHADEMO J1772 J1772COMBO,33.759802,-118.096665
