In [1]:
import pandas as pd

# schalchemy dependencies
from sqlalchemy import create_engine, MetaData
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy import Column, Integer, String, Numeric, Text, Float, ForeignKey
from sqlalchemy.orm import sessionmaker, relationship

# Import PyMySQL (Not needed if mysqlclient is installed)
import pymysql
pymysql.install_as_MySQLdb()

In [2]:
# Import and establish Base for which classes will be constructed 

Base = declarative_base()

In [3]:
car_sales_df = pd.read_csv('../Resources/car_combined_test_train.csv')

car_df = car_sales_df.rename(columns={'Resale-Cat':'Resale_Cat','Index': 'index'})
car_df.head()

Unnamed: 0,index,Resale_Cat,fuel_type,vehicle_class,displ,cyl,trans,drive,smog_rating,city_mpg,hwy_mpg,cmb_mpg,Greenhouse_gas_score,smartway,comb_CO2,Sales_2016,Sales_2017,Sales_2018
0,0,Low,Gasoline,large car,3.0,6,SemiAuto-8,2WD,6,21.0,29.0,24.0,5,No,371.0,12071,8731,7909
1,1,Low,Gasoline,large car,3.0,6,SemiAuto-8,2WD,6,21.0,29.0,24.0,5,No,371.0,12071,8731,7909
2,2,Low,Gasoline,large car,4.4,8,SemiAuto-8,2WD,5,17.0,25.0,20.0,4,No,436.0,12071,8731,7909
3,3,Low,Gasoline,large car,4.4,8,SemiAuto-8,2WD,6,17.0,25.0,20.0,4,No,436.0,12071,8731,7909
4,4,Low,Gasoline,large car,4.4,8,SemiAuto-8,4WD,5,16.0,25.0,19.0,4,No,459.0,12071,8731,7909


In [4]:
#Reading the dataset
carvalidate_df = pd.read_csv('../Resources/car_data_combined_validation.csv')

carvalidate_df.head()

Unnamed: 0,Index,Resale_Cat,model,fuel_type,vehicle_class,displ,cyl,trans,drive,smog_rating,city_mpg,hwy_mpg,cmb_mpg,Greenhouse_gas_score,smartway,comb_CO2,Sales_2016,Sales_2017,Sales_2018
0,4,Low,BMW 750i,Gasoline,large car,4.4,8,SemiAuto-8,4WD,5,16,25,19,4,No,459,12071,8731,7909
1,65,Low,FIAT 500L,Gasoline,station wagon,1.4,4,Man-6,2WD,6,25,33,28,7,Yes,312,14380,11467,4916
2,119,Low,JAGUAR XJ,Gasoline,large car,3.0,6,SemiAuto-8,2WD,9,18,27,21,5,No,414,3542,2553,1348
3,132,Low,NISSAN Leaf,Electricity,midsize car,0.0,0,Auto-1,2WD,10,126,101,114,10,Elite,0,13251,10458,14565
4,139,High,TOYOTA 4Runner,Gasoline,standard SUV,4.0,6,SemiAuto-5,4WD,6,17,21,18,4,No,478,104456,119597,130025


In [5]:
train_df  = pd.read_csv("../Resources/car_sales_encoded.csv")

train_df.head()

Unnamed: 0,Resale-Cat,fuel_type,vehicle_class,trans,drive,smog_rating,city_mpg,hwy_mpg,cmb_mpg,Greenhouse_gas_score,smartway,comb_CO2,Sales_2016,Sales_2017,Sales_2018
0,1,3,0,7,0,5,17.0,25.0,1,4,1,436.0,12071,8731,7909
1,1,3,0,7,0,6,17.0,25.0,1,4,1,436.0,12071,8731,7909
2,1,3,0,7,1,5,16.0,25.0,1,4,1,459.0,12071,8731,7909
3,0,3,2,2,0,6,20.0,27.0,1,5,1,397.0,103217,106583,20848
4,0,3,2,2,0,6,20.0,27.0,1,5,1,397.0,103217,106583,20848


In [6]:
test_df = pd.read_csv("../Resources/encoded_output.csv")

test_df

Unnamed: 0,Resale_Cat,fuel_type,vehicle_class,trans,drive,smog_rating,city_mpg,hwy_mpg,cmb_mpg,Greenhouse_gas_score,smartway,comb_CO2,Sales_2016,Sales_2017,Sales_2018
0,1,1,0,6,1,5,16,25,1,4,1,459,12071,8731,7909
1,1,1,5,4,0,6,25,33,2,7,2,312,14380,11467,4916
2,1,1,0,6,0,9,18,27,1,5,1,414,3542,2553,1348
3,1,0,1,0,0,10,126,101,0,10,0,0,13251,10458,14565
4,0,1,4,5,1,6,17,21,1,4,1,478,104456,119597,130025
5,0,1,2,3,0,5,16,22,1,3,1,497,207299,204211,30244
6,0,1,3,1,1,6,17,21,1,3,1,481,180991,179188,228293
7,0,1,2,2,0,6,19,26,1,5,1,410,23666,32068,28486


In [7]:
# Import modules to declare columns and column data types
from sqlalchemy import Column, Integer, String, Float

In [8]:
# Create All classes
# ----------------------------------
class Car(Base):
    __tablename__ = 'car'
    index = Column(Integer, primary_key=True)
    Resale_Cat = Column(String(255))
    model = Column(String(255))
    vehicle_class = Column(String(255))
    Fuel_type = Column(String(255))
    displ = Column(Float)
    cyl = Column(Integer)
    trans= Column(String(255))
    drive = Column(String(255))
    smog_rating = Column(Integer)
    city_mpg  = Column(Float)
    hwy_mpg  = Column(Float)
    cmb_mpg = Column(Float)
    Greenhouse_gas_score = Column(Integer)
    smartway = Column(String(255))
    comb_CO2 =Column(Float)
    Sales_2016 =Column(Integer)
    Sales_2017 = Column(Integer)
    Sales_2018=Column(Integer)


# ----------------------------------
class CarValidate(Base):
    __tablename__ = 'carvalidate'
    index = Column(Integer, primary_key=True)
    Resale_Cat = Column(String(255))
    model = Column(String(255))
    vehicle_class = Column(String(255))
    Fuel_type = Column(String(255))
    displ = Column(Float)
    cyl = Column(Integer)
    trans= Column(String(255))
    drive = Column(String(255))
    smog_rating = Column(Integer)
    city_mpg  = Column(Float)
    hwy_mpg  = Column(Float)
    cmb_mpg = Column(Float)
    Greenhouse_gas_score = Column(Integer)
    smartway = Column(String(255))
    comb_CO2 =Column(Float)
    Sales_2016 =Column(Integer)
    Sales_2017 = Column(Integer)
    Sales_2018=Column(Integer)

# ----------------------------------
class Train(Base):
    __tablename__ = 'train'
    id = Column(Integer, primary_key=True)
    Resale_Cat =Column(Integer)
    vehicle_class = Column(Integer)
    Fuel_type = Column(Integer)
    
    trans= Column(Integer)
    drive = Column(Integer)
    smog_rating = Column(Integer)
    city_mpg  = Column(Float)
    hwy_mpg  = Column(Float)
    cmb_mpg = Column(Integer)
    Greenhouse_gas_score = Column(Integer)
    smartway = Column(Integer)
    comb_CO2 =Column(Float)
    Sales_2016 =Column(Integer)
    Sales_2017 = Column(Integer)
    Sales_2018=Column(Integer)
# ----------------------------------
class Test(Base):
    __tablename__ = 'test'
    id = Column(Integer, primary_key=True)
    Resale_Cat =Column(Integer)
    vehicle_class = Column(Integer)
    Fuel_type = Column(Integer)
    
    trans= Column(Integer)
    drive = Column(Integer)
    smog_rating = Column(Integer)
    city_mpg  = Column(Float)
    hwy_mpg  = Column(Float)
    cmb_mpg = Column(Integer)
    Greenhouse_gas_score = Column(Integer)
    smartway = Column(Integer)
    comb_CO2 =Column(Float)
    Sales_2016 =Column(Integer)
    Sales_2017 = Column(Integer)
    Sales_2018=Column(Integer)

In [9]:
# Create Database Connection
# ----------------------------------
# Establish Connection to MySQL
engine = create_engine("sqlite:///fuel_economy.sqlite")
conn = engine.connect()

In [10]:
# Create Fuel table within the database
Base.metadata.create_all(conn)

In [11]:
car_df.to_sql("car", engine, if_exists='append', index=False)
carvalidate_df.to_sql("carvalidate", engine, if_exists='append', index=False)

OperationalError: (sqlite3.OperationalError) table carvalidate has no column named model [SQL: 'INSERT INTO carvalidate ("Index", "Resale_Cat", model, fuel_type, vehicle_class, displ, cyl, trans, drive, smog_rating, city_mpg, hwy_mpg, cmb_mpg, "Greenhouse_gas_score", smartway, "comb_CO2", "Sales_2016", "Sales_2017", "Sales_2018") VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)'] [parameters: ((4, 'Low', 'BMW 750i', 'Gasoline', 'large car', 4.4, 8, 'SemiAuto-8', '4WD', 5, 16, 25, 19, 4, 'No', 459, 12071, 8731, 7909), (65, 'Low', 'FIAT 500L', 'Gasoline', 'station wagon', 1.4, 4, 'Man-6', '2WD', 6, 25, 33, 28, 7, 'Yes', 312, 14380, 11467, 4916), (119, 'Low', 'JAGUAR XJ', 'Gasoline', 'large car', 3.0, 6, 'SemiAuto-8', '2WD', 9, 18, 27, 21, 5, 'No', 414, 3542, 2553, 1348), (132, 'Low', 'NISSAN Leaf', 'Electricity', 'midsize car', 0.0, 0, 'Auto-1', '2WD', 10, 126, 101, 114, 10, 'Elite', 0, 13251, 10458, 14565), (139, 'High', 'TOYOTA 4Runner', 'Gasoline', 'standard SUV', 4.0, 6, 'SemiAuto-5', '4WD', 6, 17, 21, 18, 4, 'No', 478, 104456, 119597, 130025), (276, 'High', 'GMC Sierra 15', 'Gasoline', 'pickup', 5.3, 8, 'Auto-8', '2WD', 5, 16, 22, 18, 3, 'No', 497, 207299, 204211, 30244), (296, 'High', 'JEEP Wrangler', 'Gasoline', 'small SUV', 3.6, 6, 'Auto-5', '4WD', 6, 17, 21, 18, 3, 'No', 481, 180991, 179188, 228293), (287, 'High', 'HONDA Ridgeline', 'Gasoline', 'pickup', 3.5, 6, 'Auto-6', '2WD', 6, 19, 26, 22, 5, 'No', 410, 23666, 32068, 28486))] (Background on this error at: http://sqlalche.me/e/e3q8)

In [None]:
train_df.to_sql("train", engine, if_exists='append', index=False)
test_df.to_sql("test", engine, if_exists='append', index=False)

In [None]:
# To push the objects made and query the server we use a 
#Session object
from sqlalchemy.orm import Session
session = Session(bind=engine)

In [None]:
engine.execute("SELECT * FROM car LIMIT 1").fetchall()

In [None]:
engine.execute("SELECT * FROM carvalidate LIMIT 1").fetchall()

In [None]:
engine.execute("SELECT * FROM train LIMIT 1").fetchall()

In [None]:
engine.execute("SELECT * FROM test LIMIT 1").fetchall()

In [None]:
test_data = engine.execute("SELECT * FROM test").fetchall()

In [None]:
print(test_data)

In [None]:
engine = create_engine("sqlite:///fuel_economy.sqlite")
conn = engine.connect()

In [None]:
df = pd.read_sql_query("SELECT * FROM test",conn)

In [None]:
df

In [None]:
import matplotlib.pyplot as plt
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler, LabelEncoder
import pandas as pd
import os
import pickle

In [None]:
filename = '../Model/finalized_smartway_model_KN.sav'
# load the model from disk
loaded_model = pickle.load(open(filename, 'rb'))

In [None]:
new_df = pd.read_sql_query("SELECT * FROM test", conn)

In [None]:
newX = new_df.drop(['id','smartway'], axis=1)
newy = new_df['smartway']
print(newX)

In [None]:
result = loaded_model.score(newX, newy)
ynew = loaded_model.predict(newX)

In [None]:
print(ynew)