In [1]:
# data science
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import seaborn as sns
import datetime

# API
import requests
import json

# Python SQL toolkit and Object Relational Mapper
import sqlalchemy
from sqlalchemy.ext.automap import automap_base
from sqlalchemy.orm import Session
from sqlalchemy import create_engine, text, inspect, func
import plotly.express as px

In [2]:
#Reading csv file
raw_data = pd.read_csv('Supercharge Locations.csv',encoding='unicode_escape')
raw_data.head(50)

Unnamed: 0,Supercharger,Street Address,City,State,Zip,Country,Stalls,kW,GPS,Elev(m),Open Date,Unnamed: 11
0,"Buellton, CA",555 McMurray Rd,Buellton,CA,93427,USA,10,150.0,"34.61456, -120.188387",114,7/13/2013,
1,"Corning, CA",950 Hwy 99,Corning,CA,96021,USA,6,150.0,"39.926454, -122.198393",87,10/18/2013,
2,"Barstow, CA",2812 Lenwood Rd,Barstow,CA,92311,USA,16,150.0,"34.849129, -117.085446",725,11/19/2012,
3,"Tifton, GA",1310 U.S. 82,Tifton,GA,31794,USA,8,150.0,"31.448847, -83.53221",92,7/10/2014,
4,"Roseville, CA",1151 Galleria Blvd,Roseville,CA,95678,USA,7,150.0,"38.771208, -121.266149",66,4/29/2014,
5,"Oxnard, CA",2751 Park View Ct,Oxnard,CA,93036,USA,20,150.0,"34.238654, -119.177398",24,9/24/2014,
6,"Daytona Beach - Gateway North Dr, FL",2330 Gateway N Dr,Daytona Beach,FL,32117,USA,12,250.0,"29.223265, -81.09972",7,11/19/2022,
7,"Port St. Lucie, FL",1701 NW Peacock Blvd,Port St. Lucie,FL,34986,USA,16,150.0,"27.313023, -80.406688",8,7/25/2013,
8,"Dallas - Park Ln, TX",8080 Park Ln,Dallas,TX,75231,USA,16,250.0,"32.867639, -96.767245",179,11/17/2021,
9,"Dangjin, South Korea","63, Songak-ro, Songak-eup, Dangjin-si, Chungch...",Dangjin,Chungcheongnam,,South Korea,4,120.0,"36.909503, 126.69407466",42,12/20/2019,


In [3]:
raw_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5876 entries, 0 to 5875
Data columns (total 12 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   Supercharger    5876 non-null   object 
 1   Street Address  5876 non-null   object 
 2   City            5876 non-null   object 
 3   State           5754 non-null   object 
 4   Zip             3947 non-null   object 
 5   Country         5876 non-null   object 
 6   Stalls          5876 non-null   int64  
 7   kW              5870 non-null   float64
 8   GPS             5876 non-null   object 
 9   Elev(m)         5876 non-null   int64  
 10  Open Date       5126 non-null   object 
 11  Unnamed: 11     0 non-null      float64
dtypes: float64(2), int64(2), object(8)
memory usage: 551.0+ KB


In [4]:
# splitting GPS Column to Latitude and Longitude
raw_data[['Latitude', 'Longitude']]=raw_data['GPS'].str.split(',', expand=True)
raw_data['Latitude']=raw_data['Latitude'].str.strip()
raw_data['Longitude']=raw_data['Longitude'].str.strip()
raw_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5876 entries, 0 to 5875
Data columns (total 14 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   Supercharger    5876 non-null   object 
 1   Street Address  5876 non-null   object 
 2   City            5876 non-null   object 
 3   State           5754 non-null   object 
 4   Zip             3947 non-null   object 
 5   Country         5876 non-null   object 
 6   Stalls          5876 non-null   int64  
 7   kW              5870 non-null   float64
 8   GPS             5876 non-null   object 
 9   Elev(m)         5876 non-null   int64  
 10  Open Date       5126 non-null   object 
 11  Unnamed: 11     0 non-null      float64
 12  Latitude        5876 non-null   object 
 13  Longitude       5876 non-null   object 
dtypes: float64(2), int64(2), object(10)
memory usage: 642.8+ KB


In [5]:
#Deleting Extra Columns
raw_data=raw_data.drop(columns=['Zip', 'GPS', 'Unnamed: 11'], axis=1)
raw_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5876 entries, 0 to 5875
Data columns (total 11 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   Supercharger    5876 non-null   object 
 1   Street Address  5876 non-null   object 
 2   City            5876 non-null   object 
 3   State           5754 non-null   object 
 4   Country         5876 non-null   object 
 5   Stalls          5876 non-null   int64  
 6   kW              5870 non-null   float64
 7   Elev(m)         5876 non-null   int64  
 8   Open Date       5126 non-null   object 
 9   Latitude        5876 non-null   object 
 10  Longitude       5876 non-null   object 
dtypes: float64(1), int64(2), object(8)
memory usage: 505.1+ KB


In [6]:
#changing data types
raw_data['Open Date']= pd.to_datetime(raw_data['Open Date'])
raw_data[['Stalls' , 'Elev(m)']]=raw_data[['Stalls' , 'Elev(m)']].astype(int)
raw_data[['kW', 'Latitude','Longitude']]=raw_data[['kW','Latitude','Longitude']].astype(float)
raw_data.head(50)

Unnamed: 0,Supercharger,Street Address,City,State,Country,Stalls,kW,Elev(m),Open Date,Latitude,Longitude
0,"Buellton, CA",555 McMurray Rd,Buellton,CA,USA,10,150.0,114,2013-07-13,34.61456,-120.188387
1,"Corning, CA",950 Hwy 99,Corning,CA,USA,6,150.0,87,2013-10-18,39.926454,-122.198393
2,"Barstow, CA",2812 Lenwood Rd,Barstow,CA,USA,16,150.0,725,2012-11-19,34.849129,-117.085446
3,"Tifton, GA",1310 U.S. 82,Tifton,GA,USA,8,150.0,92,2014-07-10,31.448847,-83.53221
4,"Roseville, CA",1151 Galleria Blvd,Roseville,CA,USA,7,150.0,66,2014-04-29,38.771208,-121.266149
5,"Oxnard, CA",2751 Park View Ct,Oxnard,CA,USA,20,150.0,24,2014-09-24,34.238654,-119.177398
6,"Daytona Beach - Gateway North Dr, FL",2330 Gateway N Dr,Daytona Beach,FL,USA,12,250.0,7,2022-11-19,29.223265,-81.09972
7,"Port St. Lucie, FL",1701 NW Peacock Blvd,Port St. Lucie,FL,USA,16,150.0,8,2013-07-25,27.313023,-80.406688
8,"Dallas - Park Ln, TX",8080 Park Ln,Dallas,TX,USA,16,250.0,179,2021-11-17,32.867639,-96.767245
9,"Dangjin, South Korea","63, Songak-ro, Songak-eup, Dangjin-si, Chungch...",Dangjin,Chungcheongnam,South Korea,4,120.0,42,2019-12-20,36.909503,126.694075


In [7]:
raw_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5876 entries, 0 to 5875
Data columns (total 11 columns):
 #   Column          Non-Null Count  Dtype         
---  ------          --------------  -----         
 0   Supercharger    5876 non-null   object        
 1   Street Address  5876 non-null   object        
 2   City            5876 non-null   object        
 3   State           5754 non-null   object        
 4   Country         5876 non-null   object        
 5   Stalls          5876 non-null   int32         
 6   kW              5870 non-null   float64       
 7   Elev(m)         5876 non-null   int32         
 8   Open Date       5126 non-null   datetime64[ns]
 9   Latitude        5876 non-null   float64       
 10  Longitude       5876 non-null   float64       
dtypes: datetime64[ns](1), float64(3), int32(2), object(5)
memory usage: 459.2+ KB


In [8]:
#dropping all rows with NaN values
cleaned_data= raw_data.dropna()
cleaned_data.info()

<class 'pandas.core.frame.DataFrame'>
Index: 5026 entries, 0 to 5125
Data columns (total 11 columns):
 #   Column          Non-Null Count  Dtype         
---  ------          --------------  -----         
 0   Supercharger    5026 non-null   object        
 1   Street Address  5026 non-null   object        
 2   City            5026 non-null   object        
 3   State           5026 non-null   object        
 4   Country         5026 non-null   object        
 5   Stalls          5026 non-null   int32         
 6   kW              5026 non-null   float64       
 7   Elev(m)         5026 non-null   int32         
 8   Open Date       5026 non-null   datetime64[ns]
 9   Latitude        5026 non-null   float64       
 10  Longitude       5026 non-null   float64       
dtypes: datetime64[ns](1), float64(3), int32(2), object(5)
memory usage: 431.9+ KB


In [9]:
cleaned_data.head(50)

Unnamed: 0,Supercharger,Street Address,City,State,Country,Stalls,kW,Elev(m),Open Date,Latitude,Longitude
0,"Buellton, CA",555 McMurray Rd,Buellton,CA,USA,10,150.0,114,2013-07-13,34.61456,-120.188387
1,"Corning, CA",950 Hwy 99,Corning,CA,USA,6,150.0,87,2013-10-18,39.926454,-122.198393
2,"Barstow, CA",2812 Lenwood Rd,Barstow,CA,USA,16,150.0,725,2012-11-19,34.849129,-117.085446
3,"Tifton, GA",1310 U.S. 82,Tifton,GA,USA,8,150.0,92,2014-07-10,31.448847,-83.53221
4,"Roseville, CA",1151 Galleria Blvd,Roseville,CA,USA,7,150.0,66,2014-04-29,38.771208,-121.266149
5,"Oxnard, CA",2751 Park View Ct,Oxnard,CA,USA,20,150.0,24,2014-09-24,34.238654,-119.177398
6,"Daytona Beach - Gateway North Dr, FL",2330 Gateway N Dr,Daytona Beach,FL,USA,12,250.0,7,2022-11-19,29.223265,-81.09972
7,"Port St. Lucie, FL",1701 NW Peacock Blvd,Port St. Lucie,FL,USA,16,150.0,8,2013-07-25,27.313023,-80.406688
8,"Dallas - Park Ln, TX",8080 Park Ln,Dallas,TX,USA,16,250.0,179,2021-11-17,32.867639,-96.767245
9,"Dangjin, South Korea","63, Songak-ro, Songak-eup, Dangjin-si, Chungch...",Dangjin,Chungcheongnam,South Korea,4,120.0,42,2019-12-20,36.909503,126.694075


In [10]:
cleaned_data.describe()

Unnamed: 0,Stalls,kW,Elev(m),Open Date,Latitude,Longitude
count,5026.0,5026.0,5026.0,5026,5026.0,5026.0
mean,9.164743,203.28651,229.49224,2020-07-06 18:53:26.048547328,37.025383,11.168604
min,2.0,72.0,-11.0,2012-11-19 00:00:00,-44.354408,-157.941632
25%,6.0,150.0,13.0,2018-10-12 00:00:00,30.703741,-82.48562
50%,8.0,250.0,67.0,2021-03-26 00:00:00,37.457994,8.561589
75%,12.0,250.0,249.0,2022-09-22 00:00:00,43.616725,114.78766
max,80.0,250.0,4338.0,2023-06-03 00:00:00,70.999582,174.840171
std,5.844308,59.426278,423.426783,,12.514649,96.161505


In [11]:
engine = create_engine("sqlite:///Tesla.sqlite")

In [12]:
cleaned_data.to_csv("cleaned_data.csv", index=False)

In [None]:
cleaned_data = cleaned_data.rename(columns= {"

In [18]:
query = """CREATE TABLE supercharge_locations (
    id INTEGER PRIMARY KEY,
    Supercharger TEXT,
    Street_Address TEXT,
    City TEXT,
    State TEXT,
    Country TEXT,
    Stalls INTEGER,
    kW REAL,
    Elev_m INTEGER,
    Open_Date TEXT,
    Latitude REAL,
    Longitude REAL
)"""

with engine.connect() as con:
    con.execute(text(query))





In [23]:
cleaned_data = cleaned_data.rename(columns= {"Street Address" : "Street_Address","Elev(m)":"Elev_m","Open Date":"Open_Date"})

In [24]:
cleaned_data.columns

Index(['Supercharger', 'Street_Address', 'City', 'State', 'Country', 'Stalls',
       'kW', 'Elev_m', 'Open_Date', 'Latitude', 'Longitude'],
      dtype='object')

In [25]:
cleaned_data.to_sql("supercharge_locations", con=engine, index=False, method="multi", if_exists="append")

5026

In [26]:
# INSPECT to confirm existence

# Create the inspector and connect it to the engine
inspector_gadget = inspect(engine)

# Collect the names of tables within the database
tables = inspector_gadget.get_table_names()

# print metadata for each table
for table in tables:
    print(table)
    print("-----------")
    
    # get columns
    columns = inspector_gadget.get_columns(table)
    for column in columns:
        print(column["name"], column["type"])

    print()

supercharge_locations
-----------
id INTEGER
Supercharger TEXT
Street_Address TEXT
City TEXT
State TEXT
Country TEXT
Stalls INTEGER
kW REAL
Elev_m INTEGER
Open_Date TEXT
Latitude REAL
Longitude REAL



In [None]:
engine.dispose()