In [10]:
from boxsdk import JWTAuth, Client
import io
import pandas as pd
import psycopg2

# test

# Authentication and Client Setup
def authenticate_box():
    config = JWTAuth.from_settings_file("Token/BOX_TOKEN.json")
    client = Client(config)
    user = client.user().get()
    print(f"Authenticated as {user.name}")
    return client

# Search for all CSV files in the folder
def find_csv_files(client, folder_id='287803137437'):
    csv_files = []
    items = client.folder(folder_id).get_items(limit=100)
    for item in items:
        if item.type == 'file' and item.name.endswith('.csv'):
            print(f'CSV File Found: {item.name}')
            csv_files.append(item)
    if not csv_files:
        print("No CSV files found.")
    return csv_files
    
# Download and load file into a pandas DataFrame
def download_csv_file(file_item):
    if file_item:
        file_stream = io.BytesIO()
        file_item.download_to(file_stream)
        file_stream.seek(0)
        df = pd.read_csv(file_stream)
        return df
    return None

# Check if table exists in PostgreSQL
def check_table_exists(cursor, schema, table):
    cursor.execute(f"SELECT * FROM information_schema.tables WHERE table_schema = '{schema}' AND table_name = '{table}';")
    return cursor.fetchall()

# Append DataFrame to PostgreSQL table using INSERT statements
def append_data_to_db(df, schema, table, db_params):
    try:
        conn = psycopg2.connect(**db_params)
        cursor = conn.cursor()

        if check_table_exists(cursor, schema, table):
            print(f"Table {schema}.{table} exists.")
        else:
            print(f"Table {schema}.{table} does not exist.")
            return False  # Exit early if the table doesn't exist

        # Generate the column names from the DataFrame
        columns = ', '.join(df.columns)
        
        # Iterate over the DataFrame and insert row by row
        for i, row in df.iterrows():
            values = ', '.join([f"'{x}'" if isinstance(x, str) else str(x) for x in row])
            insert_query = f"INSERT INTO {schema}.{table} ({columns}) VALUES ({values});"
            cursor.execute(insert_query)

        # Commit the transaction
        conn.commit()
        print(f"Data appended successfully to {schema}.{table}")
        return True  # Indicate success

    except Exception as e:
        print(f"An error occurred: {e}")
        return False  # Indicate failure

    finally:
        cursor.close()
        conn.close()

# Move file to 'imported' subfolder on Box
def move_file_to_imported(client, file_item):
    imported_folder_id = '287805162509'  # ID of the 'imported' subfolder
    try:
        # Move the file to the imported folder
        imported_folder = client.folder(imported_folder_id)
        file_item.move(imported_folder)
        print(f"File '{file_item.name}' moved to 'imported' folder.")
    except Exception as e:
        print(f"Failed to move file: {e}")

# Main execution
if __name__ == "__main__":
    client = authenticate_box()
    csat_file = find_csv_files(client)
    df = download_csv_file(csat_file)

    if df is not None:
        db_params = {
            'host': 'localhost',
            'port': 5432,
            'dbname': 'mbta_dw',
            'user': 'opmi_etl',
            'password': 'postgres'  # Replace with the actual password
        }

    for file_item in csv_files:  # Iterate through each found CSV file
        df = download_csv_file(file_item)  # Download each file

        if df is not None:
            # Extract schema and table name from the file name
            file_name = os.path.basename(file_item.name)  # Get the base file name
            schema, table = file_name[:-4].split('_')  # Split on '_'
            
            # Append data to the database
            if append_data_to_db(df, schema, table, db_params):
                move_file_to_imported(client, file_item)  # Move the file if successful
            else:
                print("Database insert failed; file will not be moved.")

Authenticated as BoxR
CSV File Found: odx2.fare_action.csv
CSV File Found: surveys.csat.csv


AttributeError: 'list' object has no attribute 'download_to'

## SQL table creation

In [73]:
import psycopg2

# Database connection parameters
db_params = {
    'host': 'localhost',
    'port': 5432,
    'dbname': 'mbta_dw',
    'user': 'opmi_etl',
    'password': 'postgres'  # Replace with the actual password
}

try:
    # Connect to the PostgreSQL database
    conn = psycopg2.connect(**db_params)
    cursor = conn.cursor()

    # Create 'csat' table in 'surveys' schema
    create_table_query = sql.SQL("""
        CREATE TABLE IF NOT EXISTS surveys.csat (
                survey_date DATE,
                survey_name VARCHAR(255),
                question_description TEXT,
                response_total INTEGER,
                response_1_text VARCHAR(255),
                response_1_percent FLOAT,
                response_2_text VARCHAR(255),
                response_2_percent FLOAT,
                response_3_text VARCHAR(255),
                response_3_percent FLOAT,
                response_4_text VARCHAR(255),
                response_4_percent FLOAT,
                response_5_text VARCHAR(255),
                response_5_percent FLOAT,
                response_6_text VARCHAR(255),
                response_6_percent FLOAT,
                response_7_text VARCHAR(255),
                response_7_percent FLOAT
        )
    """)

    # Execute the create table query
    cursor.execute(create_table_query)

    # Commit the changes
    conn.commit()

    print("Table 'csat' created successfully in 'surveys' schema.")

except Exception as e:
    print("Error while connecting to PostgreSQL or creating table:", e)

finally:
    # Close the database connection
    if cursor:
        cursor.close()
    if conn:
        conn.close()

Table 'csat' created successfully in 'surveys' schema.
