In [1]:
# Import dependencies
import pandas as pd
import psycopg2
import psycopg2.sql as sql

from pathlib import Path
from psycopg2.extensions import ISOLATION_LEVEL_AUTOCOMMIT
from sqlalchemy import create_engine

In [2]:
# Path to csv data files
data_path = Path('data', 'routes_rated.csv')

# Read data into dataframe
routes_df = pd.read_csv(data_path)

In [3]:
# Check dataframe
routes_df.head()

Unnamed: 0,name_id,country,crag,sector,name,tall_recommend_sum,grade_mean,cluster,rating_tot
0,0,and,montserrat,prohibitivo,diagonal de la x,-1,49.25,3,-0.045211
1,1,and,montserrat,prohibitivo,mehir,-1,49.0,2,0.116464
2,2,and,montserrat,prohibitivo,pas de la discordia,0,49.0,2,0.178722
3,3,and,tartareu,bombo suis,tenedor libre,0,44.333333,3,0.158449
4,4,arg,bandurrias,rincon,tendinitis,1,48.5,0,0.075797


In [4]:
# Format case of text columns
routes_df['country'] = routes_df['country'].str.upper()
routes_df['crag'] = routes_df['crag'].str.title()
routes_df['sector'] = routes_df['sector'].str.title()
routes_df['name'] = routes_df['name'].str.title()
routes_df.head()

Unnamed: 0,name_id,country,crag,sector,name,tall_recommend_sum,grade_mean,cluster,rating_tot
0,0,AND,Montserrat,Prohibitivo,Diagonal De La X,-1,49.25,3,-0.045211
1,1,AND,Montserrat,Prohibitivo,Mehir,-1,49.0,2,0.116464
2,2,AND,Montserrat,Prohibitivo,Pas De La Discordia,0,49.0,2,0.178722
3,3,AND,Tartareu,Bombo Suis,Tenedor Libre,0,44.333333,3,0.158449
4,4,ARG,Bandurrias,Rincon,Tendinitis,1,48.5,0,0.075797


In [20]:
# Create a new column called "style" which indicates if the route is preferred by short or tall climbers
def style(x):
    if x < 0:
        return 'Short'
    elif x > 0:
        return 'Tall'
    else:
        return 'Neutral'

routes_df['style'] = routes_df['tall_recommend_sum'].apply(style)
routes_df.head()

Unnamed: 0,name_id,country,crag,sector,name,tall_recommend_sum,grade_mean,cluster,rating_tot,style
0,0,AND,Montserrat,Prohibitivo,Diagonal De La X,-1,49.25,3,-0.045211,Short
1,1,AND,Montserrat,Prohibitivo,Mehir,-1,49.0,2,0.116464,Short
2,2,AND,Montserrat,Prohibitivo,Pas De La Discordia,0,49.0,2,0.178722,Neutral
3,3,AND,Tartareu,Bombo Suis,Tenedor Libre,0,44.333333,3,0.158449,Neutral
4,4,ARG,Bandurrias,Rincon,Tendinitis,1,48.5,0,0.075797,Tall


In [None]:
# Clean up the dataframe by removing unnecessary columns
routes_df = routes_df.drop('tall_recommend_sum', axis = 1)
routes_df.head()

In [None]:
# Change grade_mean column from float to int
routes_df['grade_mean'] = routes_df['grade_mean'].astype(int)
routes_df.head()

In [None]:
# Connect to postgres and create a database
try:
    conn = psycopg2.connect("user=postgres password = 'admin'")
    conn.set_isolation_level(ISOLATION_LEVEL_AUTOCOMMIT)
    cursor = conn.cursor()
    database_name = "climber_db"
    # Create table statement
    sqlCreateDatabase = "CREATE DATABASE "+database_name+";"
    # Create a table in PostgreSQL database
    cursor.execute(sqlCreateDatabase)
except: 
    print('Database connection not successful')

In [None]:
# Set connection to new created database using psycopg2
try:
    conn = psycopg2.connect(database = "climber_db", user = "postgres", password = "admin", host = "localhost", port = "5432")
except:
    print("Database connection not successful") 

In [None]:
# Create a table using psycopg2 connection
cursor = conn.cursor()
table_creation = '''
   CREATE TABLE route_ratings (
       name_id INT PRIMARY KEY,
       country VARCHAR(3),
       crag TEXT NOT NULL,
       sector TEXT NOT NULL,
       name TEXT NOT NULL,
       grade_mean INT,
       cluster INT,
       rating_tot FLOAT,
       style TEXT NOT NULL
   )
'''
cursor.execute(table_creation)
conn.commit()

In [None]:
# Insert dataframe into database table
engine = create_engine('postgresql://postgres:admin@localhost:5432/climber_db')
routes_df.to_sql('route_ratings', engine, if_exists='append', index = False)

In [None]:
# Query from database and confirm data is in
cursor.execute('SELECT * from route_ratings;')
route_data = cursor.fetchall()

conn.commit()
# close connection and cursor
cursor.close()
conn.close()
for row in route_data:
    print(row)