# Classics, Collectibles, & Popular Cars PSQL Database 

In [None]:
import psycopg2
import pandas as pd

In [None]:
#INPUT YOUR OWN PASSWORD TO YOUR POSTGRESQL
conn = psycopg2.connect(
    host = "localhost",
    database = "cars",
    port = "5432",
    user = "postgres",
    password= "") # INPUT PW HERE

In [None]:
cur = conn.cursor()

In [None]:
# If at anytime we have an error due to incomplete queries, uncomment and run line below. 
# cur.execute("ROLLBACK")

### Execute car_make.sql

In [None]:
sql = 'DROP TABLE IF EXISTS car_make;'
try:
    cur.execute(sql)
except:
    print("error dropping table")

try:
    cur.execute(open('car_make.sql', 'r').read())
except:
    print('error executing sql file')

In [None]:
conn.commit()

### Test connection

In [None]:
cur.execute("SELECT * from car_make;")

In [None]:
print(cur.fetchall())

### Create car_name Table

In [None]:
df = pd.read_csv('car_name.csv')

In [None]:
df.head(5)

In [None]:
sql = 'DROP TABLE IF EXISTS car_name;'
try:
    cur.execute(sql)
except:
    print('error dropping table')

In [None]:
f = open("car_name.sql", "w")
sql = '''CREATE TABLE IF NOT EXISTS car_name (id SERIAL PRIMARY KEY, name VARCHAR(30) NOT NULL, make_id INTEGER NOT NULL);'''
try:
    cur.execute(sql)
    f.write(sql)
except:
    print("create table error")

for _, car in df.iterrows():
    sql = f"INSERT INTO car_name (name, make_id) VALUES ('{car.car_name}', {car.make_id});"
    try:
        cur.execute(sql)
        f.write('\n')
        f.write(sql)
    except:
        print('insert error')

f.close()

In [None]:
conn.commit()

### Add car_models table

In [None]:
sql = '''SELECT * FROM car_name'''
df_make= pd.read_sql_query(sql,conn)
df_make = df_make.rename(columns={'id':'name_id'})

In [None]:
df_models = pd.read_csv('car_models.csv')
df_models.name = df_models.name.fillna(method='ffill')
df_models.max_year = df_models.max_year.fillna(9999).astype(int)
df_models.drop(columns = ['width', 'height', 'length','weight'], inplace =True)

In [None]:
df_models = df_models.merge(df_make, how='left' ,on='name')

In [None]:
# Make sure we did not lose any rows after merging tables. If rows were loss, then we had mismatches when merging.
assert len(df_models) == len(pd.read_csv('car_models.csv'))
df_models.head(5)

In [None]:
f = open("car_models.sql", "w")

sql = "DROP TABLE IF EXISTS car_models;"
try:
    cur.execute(sql)
    f.write(sql)
except:
    print('error dropping table')
    
sql = '''CREATE TABLE IF NOT EXISTS car_models (
    id SERIAL PRIMARY KEY, 
    name_id INTEGER NOT NULL,
    edition VARCHAR(30) NOT NULL, 
    min_year INTEGER NOT NULL,
    max_year INTEGER,
    hp INTEGER,
    torque INTEGER,
    body VARCHAR(20),
    door INTEGER,
    cylinder INTEGER,
    displacement FLOAT4,
    aspiration VARCHAR(15),
    transmission VARCHAR(5),
    drive VARCHAR(5)
    );'''
try:
    cur.execute(sql)
    f.write('\n')
    f.write(sql)
except:
    print('error creating table')
    

In [None]:
for _, car in df_models.iterrows():
    sql = f"""INSERT INTO car_models (name_id, edition, min_year, max_year, hp, torque, body, door, cylinder, displacement, aspiration, transmission, drive) 
        VALUES ('{car.name_id}','{car.edition}',{car.min_year},{car.max_year},{car.hp},{car.torque},'{car.body}',{car.door},{car.cylinder},{car.displacement},'{car.aspiration}','{car.trans}','{car.drive}');"""
    try:
        cur.execute(sql)
        f.write('\n')
        f.write(sql)
    except:
        print(f"""insert error for {car['name']}, {car.edition}""")

In [None]:
f.close()

In [None]:
sql = "SELECT * FROM car_models"
df_test= pd.read_sql_query(sql,conn)
df_test.head(5)

In [None]:
# Assertion test
assert(len(df_test) == len(df_models))

In [None]:
conn.commit()

### Compile all tables into full DataFrame

In [None]:
sql = """SELECT make, name, edition, min_year, max_year, hp, torque, 
    cylinder, displacement, aspiration,transmission, drive, body, door 
    FROM car_models AS M 
    JOIN car_name AS N ON M.name_id = N.id 
    JOIN car_make AS MA ON N.make_id = MA.id;"""
df_return = pd.read_sql_query(sql, conn)
df_return

### End Session

In [None]:
cur.close()
conn.close()

### Run Cells below to delete all tables

In [None]:
# sql = """SELECT table_name FROM information_schema.tables WHERE table_schema = 'public'"""
# cur.execute(sql)

# for table in cur.fetchall():
#     try:
#         sql = f"DROP TABLE IF EXISTS {table[0]};"
#         cur.execute(sql)
#         print(sql)
#     except:
#         print('error dropping tables')

In [None]:
# conn.commit()