In [1]:
# Import Dependencies
import numpy as np
import pandas as pd
from sqlalchemy import create_engine
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy import Column, Integer, String, Float
from sqlalchemy.orm import Session
import mysql_conn
import datetime

In [2]:
#Read all the fuel economy data for smartway vehicles 
#year: 2016,2017,2018
fuel_2016_path = 'Resources/SmartWay Vehicle List for MY 2016.xlsx'
fuel_2017_path = 'Resources/SmartWay Vehicle List for MY 2017.xlsx'
fuel_2018_path = 'Resources/SmartWay Vehicle List for MY 2018.xlsx'

all_fuel_2016_df = pd.read_excel(fuel_2016_path)
all_fuel_2017_df = pd.read_excel(fuel_2017_path)
all_fuel_2018_df = pd.read_excel(fuel_2018_path)

In [3]:
#check the dataset
all_fuel_2016_df.head()

Unnamed: 0,Model,Displ,Cyl,Trans,Drive,Fuel,Sales Area,Stnd,Stnd Description,Underhood ID,Veh Class,Smog Rating,City MPG,Hwy MPG,Cmb MPG,Greenhouse Gas Score,SmartWay
0,ACURA ILX,2.4,4.0,AutoMan-8,2WD,Gasoline,CA,L3ULEV125,California LEV-III ULEV125,GHNXV02.4XH3,small car,6,25,36,29,7,Yes
1,ACURA RLX,3.5,6.0,AutoMan-7,4WD,Gasoline,FA,B3,Federal Tier 2 Bin 3,GHNXV03.5PK3,midsize car,7,28,32,30,7,Yes
2,ACURA RLX,3.5,6.0,AutoMan-7,4WD,Gasoline,CA,L3SULEV30,California LEV-III SULEV30,GHNXV03.5PK3,midsize car,8,28,32,30,7,Yes
3,ACURA TLX,2.4,4.0,AMS-8,2WD,Gasoline,CA,U2,California LEV-II ULEV,GHNXJ02.41A3,small car,6,24,35,28,7,Yes
4,AUDI A3 Cabriolet,1.8,4.0,AMS-6,2WD,Gasoline,FA,B2,Federal Tier 2 Bin 2,GVGAV02.0APA,small car,9,24,35,28,7,Yes


In [4]:
def manage_df(all_fuel_df,year):
            
    select_df = all_fuel_df.drop(
        ['Displ', 'Cyl', 'Trans', 'Drive', 'Sales Area', 'Stnd', 'Stnd Description', 'Underhood ID'], axis=1)
    fuel_df = select_df.rename(columns={
                                     'Model': 'model', 'Veh Class': 'vehicle_class',
                                     'Fuel': 'fuel_type', 'Smog Rating': 'smog_rating', 'City MPG': 'city_mpg'
                                    , 'Hwy MPG': 'hwy_mpg','Cmb MPG': 'cmb_mpg', 
                                    'Greenhouse Gas Score':'Greenhouse_gas_score',
                                    'SmartWay':'smartway'}, index=str)
    fuel_df['year'] = year
    return fuel_df

In [5]:
fuel_2016_df = manage_df(all_fuel_2016_df,2016)
fuel_2017_df = manage_df(all_fuel_2017_df,2017)
fuel_2018_df = manage_df(all_fuel_2018_df,2018)

In [6]:
#Check the datatype of all the columns
fuel_2016_df.dtypes

model                   object
fuel_type               object
vehicle_class           object
smog_rating              int64
city_mpg                object
hwy_mpg                 object
cmb_mpg                 object
Greenhouse_gas_score     int64
smartway                object
year                     int64
dtype: object

In [7]:
fuel_2016_df.head()

Unnamed: 0,model,fuel_type,vehicle_class,smog_rating,city_mpg,hwy_mpg,cmb_mpg,Greenhouse_gas_score,smartway,year
0,ACURA ILX,Gasoline,small car,6,25,36,29,7,Yes,2016
1,ACURA RLX,Gasoline,midsize car,7,28,32,30,7,Yes,2016
2,ACURA RLX,Gasoline,midsize car,8,28,32,30,7,Yes,2016
3,ACURA TLX,Gasoline,small car,6,24,35,28,7,Yes,2016
4,AUDI A3 Cabriolet,Gasoline,small car,9,24,35,28,7,Yes,2016


model VARCHAR(50),
vehicle_class VARCHAR(50),
fuel_type VARCHAR(50),
smog_rating  INT,
city_mpg   INT,
hwy_mpg   INT,
cmb_mpg  INT,
Greenhouse_gas_score  INT,
smartway  VARCHAR(50)

In [8]:
#function: to covert the two nos from string to interger and calculate the average 
def average(number1, number2):
    return (int(number1) + int(number2)) / 2.0

In [9]:
#Function: Conver the mpg values to float and if it is hybrid car the take the average of gasoline and electric
def convert_mpg(mpg_value):
    mpg_list = []

    for i in range (len(mpg_value)):
        if '/' in (mpg_value[i]):
            mpg = mpg_value[i].split('/')
            avr_mpg = average(mpg[0],mpg[1])
            mpg_list.append(avr_mpg)
        else:
            mpg_row = float(mpg_value[i])
            mpg_list.append(mpg_row)
    return mpg_list
   
   

In [10]:
#Call convert_mpg function for all mpg columns of all 3 years
city_2016_list = convert_mpg(fuel_2016_df['city_mpg'])
hwy_2016_list = convert_mpg(fuel_2016_df['hwy_mpg'])
cmb_2016_list = convert_mpg(fuel_2016_df['cmb_mpg'])

city_2017_list = convert_mpg(fuel_2017_df['city_mpg'])
hwy_2017_list = convert_mpg(fuel_2017_df['hwy_mpg'])
cmb_2017_list = convert_mpg(fuel_2017_df['cmb_mpg'])

city_2018_list = convert_mpg(fuel_2018_df['city_mpg'])
hwy_2018_list = convert_mpg(fuel_2018_df['hwy_mpg'])
cmb_2018_list = convert_mpg(fuel_2018_df['cmb_mpg'])

In [11]:
def replace_all_mpg_col(fuel_mpg_df,city_mpg_list,hwy_mpg_list,cmb_mpg_list):
    # Drop that column
    select_mpg_df = fuel_mpg_df.drop(
        ['city_mpg', 'hwy_mpg', 'cmb_mpg'], axis=1)

    # Put all 3 series  in its place
    select_mpg_df['city_mpg'] = city_mpg_list
    select_mpg_df['hwy_mpg'] = hwy_mpg_list
    select_mpg_df['cmb_mpg'] = cmb_mpg_list
    
    #redesing the columns position 

    new_mpg_df = select_mpg_df[['year','model','fuel_type','vehicle_class','smog_rating','city_mpg','hwy_mpg',
             'cmb_mpg','Greenhouse_gas_score','smartway']]
    return new_mpg_df



In [12]:
new_city_mpg_2016_df = replace_all_mpg_col(fuel_2016_df,city_2016_list,hwy_2016_list,cmb_2016_list)
new_city_mpg_2016_df.head()

Unnamed: 0,year,model,fuel_type,vehicle_class,smog_rating,city_mpg,hwy_mpg,cmb_mpg,Greenhouse_gas_score,smartway
0,2016,ACURA ILX,Gasoline,small car,6,25.0,36.0,29.0,7,Yes
1,2016,ACURA RLX,Gasoline,midsize car,7,28.0,32.0,30.0,7,Yes
2,2016,ACURA RLX,Gasoline,midsize car,8,28.0,32.0,30.0,7,Yes
3,2016,ACURA TLX,Gasoline,small car,6,24.0,35.0,28.0,7,Yes
4,2016,AUDI A3 Cabriolet,Gasoline,small car,9,24.0,35.0,28.0,7,Yes


In [13]:
new_city_mpg_2017_df = replace_all_mpg_col(fuel_2017_df,city_2017_list,hwy_2017_list,cmb_2017_list)
new_city_mpg_2017_df.head()

Unnamed: 0,year,model,fuel_type,vehicle_class,smog_rating,city_mpg,hwy_mpg,cmb_mpg,Greenhouse_gas_score,smartway
0,2017,ACURA ILX,Gasoline,small car,6,25.0,35.0,29.0,7,Yes
1,2017,ACURA ILX,Gasoline,small car,6,25.0,35.0,29.0,7,Yes
2,2017,ACURA RLX,Gasoline,midsize car,8,29.0,30.0,29.0,7,Yes
3,2017,ACURA RLX,Gasoline,midsize car,8,29.0,30.0,29.0,7,Yes
4,2017,AUDI A3,Gasoline,small car,8,26.0,35.0,29.0,7,Yes


In [14]:
new_city_mpg_2018_df = replace_all_mpg_col(fuel_2018_df,city_2018_list,hwy_2018_list,cmb_2018_list)
new_city_mpg_2018_df.head()

Unnamed: 0,year,model,fuel_type,vehicle_class,smog_rating,city_mpg,hwy_mpg,cmb_mpg,Greenhouse_gas_score,smartway
0,2018,AUDI A3 e-tron,Gasoline/Electricity,small car,7,57.5,62.5,59.5,10,Elite
1,2018,AUDI A3 e-tron,Gasoline/Electricity,small car,7,57.5,62.5,59.5,10,Elite
2,2018,AUDI A4 Ultra,Gasoline,small car,3,27.0,37.0,31.0,7,Yes
3,2018,AUDI A4 Ultra,Gasoline,small car,3,27.0,37.0,31.0,7,Yes
4,2018,BMW 328d,Diesel,small car,3,31.0,43.0,36.0,7,Yes


In [15]:
final_mpg_df = pd.concat([new_city_mpg_2016_df, new_city_mpg_2017_df,
                          new_city_mpg_2018_df], axis=0).reset_index(drop=True)
final_mpg_df.index.name = 'id'

In [16]:
final_mpg_df.head()

Unnamed: 0_level_0,year,model,fuel_type,vehicle_class,smog_rating,city_mpg,hwy_mpg,cmb_mpg,Greenhouse_gas_score,smartway
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
0,2016,ACURA ILX,Gasoline,small car,6,25.0,36.0,29.0,7,Yes
1,2016,ACURA RLX,Gasoline,midsize car,7,28.0,32.0,30.0,7,Yes
2,2016,ACURA RLX,Gasoline,midsize car,8,28.0,32.0,30.0,7,Yes
3,2016,ACURA TLX,Gasoline,small car,6,24.0,35.0,28.0,7,Yes
4,2016,AUDI A3 Cabriolet,Gasoline,small car,9,24.0,35.0,28.0,7,Yes


In [20]:
final_mpg_df.tail()

Unnamed: 0_level_0,year,model,fuel_type,vehicle_class,smog_rating,city_mpg,hwy_mpg,cmb_mpg,Greenhouse_gas_score,smartway
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
1423,2018,VOLVO S90,Gasoline/Electricity,midsize car,7,48.0,52.5,50.0,10,Elite
1424,2018,VOLVO XC 60,Gasoline/Electricity,small SUV,7,43.0,43.0,42.5,10,Elite
1425,2018,VOLVO XC 60,Gasoline/Electricity,small SUV,7,43.0,43.0,42.5,10,Elite
1426,2018,VOLVO XC 90,Gasoline/Electricity,standard SUV,7,44.5,45.5,44.5,10,Elite
1427,2018,VOLVO XC 90,Gasoline/Electricity,standard SUV,7,44.5,45.5,44.5,10,Elite


In [42]:
print(len(new_city_mpg_2016_df))
print(len(new_city_mpg_2017_df))
print(len(new_city_mpg_2018_df))
print(len(final_mpg_df))
file_name = 'Resources/final_mpg.csv'
final_mpg_df.to_csv(file_name)

526
490
412
1428


In [31]:

password = 'Minucar123456'

# AWS connection setup. Declare username and endpoint piece.
user = 'minalCarData'
endpoint = 'cardb.ci3ptaygzvuw.us-east-2.rds.amazonaws.com'
args = f'ssl_ca= database/config/rds-ca-2015-us-east-2-root.pem'

# AWS username and password.
rds_connection_string = f"{user}:{password}@{endpoint}:3306/fuel_economy_db"
print(rds_connection_string)

minalCarData:Minucar123456@cardb.ci3ptaygzvuw.us-east-2.rds.amazonaws.com:3306/fuel_economy_db


In [32]:
engine = create_engine(f'mysql://{rds_connection_string}')

conn = engine.connect()
session = Session(engine)
print("connected to AWS")

connected to AWS


### Create database connection

In [33]:
# Create Engine and Pass in MySQL Connection
connection_string = (
    f"root:{mysql_conn.password}@localhost/fuel_economy_db")
engine = create_engine(f'mysql://{connection_string}')


In [33]:
# Confirm tables
engine.table_names()


['fuel']

In [56]:
result = engine.execute('SELECT * FROM fuel')
ids=[]
models = []
for r in result:
    ids.append(r.id)
    models.append(r.model)
models_blob = {'id': ids, 'model': models}
print(models_blob)  


{'id': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 

In [48]:
names= pd.read_sql_query('select * from fuel', con=engine)

In [51]:
names.tail()

Unnamed: 0,id,year,model,vehicle_class,fuel_type,smog_rating,city_mpg,hwy_mpg,cmb_mpg,Greenhouse_gas_score,smartway
1424,1425,2018,VOLVO S90,midsize car,Gasoline/Electricity,7,48.0,52.5,50.0,10,Elite
1425,1426,2018,VOLVO XC 60,small SUV,Gasoline/Electricity,7,43.0,43.0,42.5,10,Elite
1426,1427,2018,VOLVO XC 60,small SUV,Gasoline/Electricity,7,43.0,43.0,42.5,10,Elite
1427,1428,2018,VOLVO XC 90,standard SUV,Gasoline/Electricity,7,44.5,45.5,44.5,10,Elite
1428,1429,2018,VOLVO XC 90,standard SUV,Gasoline/Electricity,7,44.5,45.5,44.5,10,Elite


In [50]:
ids = []
models = []
print(len(names))
for i in range (len(names)):
    names_id = names['id'][i]
    ids.append(names_id)
    names_model = names['model']
    models.append(names_model)
print (ids)
print(models)

1429
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 22

### Load DataFrames into database

In [36]:
Base = declarative_base()
session = Session(bind=engine)

In [37]:
# Object relational mapping for our table
# ----------------------------------
class Fuel(Base):
    __tablename__ = 'fuel'
    id = Column(Integer, primary_key=True)
    year = Column(Integer)
    model = Column(String(255))
    vehicle_class = Column(String(255))
    fuel_type = Column(String(255))
    smog_rating = Column(Integer)
    city_mpg  = Column(Float)
    hwy_mpg  = Column(Float)
    cmb_mpg = Column(Float)
    Greenhouse_gas_score = Column(Integer)
    smartway = Column(String(255))

In [38]:
c = 0
maxRows = 10000
start_time = datetime.datetime.now()
for index, row in final_mpg_df.iterrows():
    record = Fuel()
    record.year = row['year']
    record.model = row['model']
    record.fuel_type = row['fuel_type']
    record.vehicle_class = row['vehicle_class']
    record.smog_rating = row['smog_rating']
    record.city_mpg = row['city_mpg']
    record.hwy_mpg = row['hwy_mpg']
    record.cmb_mpg = row['cmb_mpg']
    record.Greenhouse_gas_score = row['Greenhouse_gas_score']
    record.smartway = row['smartway']
    
    session.add(record)
    session.commit()
    c = c + 1
    if c >= maxRows: break

end_time = datetime.datetime.now()
print(end_time - start_time)

0:00:02.446648


In [39]:
#pd.read_sql_query('select * from Fuel', con=engine)

Unnamed: 0,id,year,model,vehicle_class,fuel_type,smog_rating,city_mpg,hwy_mpg,cmb_mpg,Greenhouse_gas_score,smartway
0,1,2016,ACURA ILX,small car,Gasoline,6,25,36,29,7,Yes
1,2,2016,ACURA RLX,midsize car,Gasoline,7,28,32,30,7,Yes
2,3,2016,ACURA RLX,midsize car,Gasoline,8,28,32,30,7,Yes
3,4,2016,ACURA TLX,small car,Gasoline,6,24,35,28,7,Yes
4,5,2016,AUDI A3 Cabriolet,small car,Gasoline,9,24,35,28,7,Yes
5,6,2016,AUDI A3 Cabriolet,small car,Gasoline,9,24,35,28,7,Yes
6,7,2016,AUDI A3 e-tron,small car,Gasoline/Electricity,9,57,62,59,10,Elite
7,8,2016,AUDI A3 e-tron,small car,Gasoline/Electricity,8,57,62,59,10,Yes
8,9,2016,AUDI A3 e-tron ultra,small car,Gasoline/Electricity,9,62,64,62,10,Elite
9,10,2016,AUDI A3 e-tron ultra,small car,Gasoline/Electricity,8,62,64,62,10,Yes


In [None]:



# Relect the existing database into a new model.

Base = automap_base()

# Reflect the table.

Base.prepare(engine, reflect=True)

# Save a reference to the ranks table as "Ranks".

Ranks = Base.classes.ranks