In [1]:
# import libraries
import pandas as pd
import sqlalchemy as db

In [2]:
# Read the CSV file, enter your full path
df = pd.read_csv(\mxmh_survey_results.csv")

In [3]:
# Create a new column 'age_group'
df['age_group'] = pd.cut(df['Age'],
                         bins=[0, 18, 35, 60, 75, 100],
                         labels=['Early Years', 'Young Adults', 'Middle Age', 'Mature Adults', 'Elderly'])

In [None]:
# Rename all column names to lower case, underscore, 'r&b' to 'rnb'

# list of column names
keys = [
    "Timestamp", "Age", "Primary streaming service", "Hours per day", "While working",
    "Instrumentalist", "Composer", "Fav genre", "Exploratory", "Foreign languages",
    "BPM", "Frequency [Classical]", "Frequency [Country]", "Frequency [EDM]",
    "Frequency [Folk]", "Frequency [Gospel]", "Frequency [Hip hop]", "Frequency [Jazz]",
    "Frequency [K pop]", "Frequency [Latin]", "Frequency [Lofi]", "Frequency [Metal]",
    "Frequency [Pop]", "Frequency [R&B]", "Frequency [Rap]", "Frequency [Rock]",
    "Frequency [Video game music]", "Anxiety", "Depression", "Insomnia", "OCD",
    "Music effects", "Permissions"
]

# Create a dictionary with lowercase keys, underscores, and without square brackets
formatted_keys = {key: key.lower().replace(" ", "_").replace("[", "").replace("]", "") for key in keys}

# Replace old column names with new ones
df = df.rename(columns=formatted_keys)
df = df.rename(columns={'frequency_r&b': 'frequency_rnb'})

In [None]:
# # Create connection engine, enter your username, password, host, port and db name

engine = db.create_engine('postgresql://username:password@host:port/db_name') 
conn = engine.raw_connection()

In [None]:
# Create new table 'survey' in PostgreSQL

commands = ('''CREATE TABLE IF NOT EXISTS survey (
    id SERIAL PRIMARY KEY,
    timestamp VARCHAR,
    age INTEGER,
    primary_streaming_service VARCHAR,
    hours_per_day FLOAT,
    while_working VARCHAR,
    instrumentalist VARCHAR,
    composer VARCHAR,
    fav_genre VARCHAR,
    exploratory VARCHAR,
    foreign_languages VARCHAR,
    bpm INTEGER,
    frequency_classical INTEGER,
    frequency_country INTEGER,
    frequency_edm INTEGER,
    frequency_folk INTEGER,
    frequency_gospel INTEGER,
    frequency_hip_hop INTEGER,
    frequency_jazz INTEGER,
    frequency_k_pop INTEGER,
    frequency_latin INTEGER,
    frequency_lofi INTEGER,
    frequency_metal INTEGER,
    frequency_pop INTEGER,
    frequency_rnb INTEGER,
    frequency_rap INTEGER,
    frequency_rock INTEGER,
    frequency_video_game_music INTEGER,
    anxiety INTEGER,
    depression INTEGER,
    insomnia INTEGER,
    ocd INTEGER,
    music_effects VARCHAR,
    permissions VARCHAR,
    age_group VARCHAR
);''')
            
# Initialize connection to PostgreSQL
cur = conn.cursor()

# Create cursor to execute SQL commands
#for command in commands:
cur.execute(commands)

# Commit changes
conn.commit()

# Close communication with server
cur.close()
conn.close()

In [None]:
df.to_sql(name= 'survey', con = engine, if_exists= 'replace', index= False) ##instead of append