# Import necessary libraries

In [1]:
import psycopg2
import pandas as pd
import configparser
import warnings
warnings.filterwarnings("ignore")


# Get Connection Details and Establish Connection

In [2]:
# Read configuration file
config = configparser.ConfigParser()
config.read('config.ini')

# Redshift connection parameters from config file
host = config['redshift']['host']
port = config['redshift']['port']
dbname = config['redshift']['dbname']
user = config['redshift']['user']
password = config['redshift']['password']

query = "SELECT * FROM journeys LIMIT 10;"
# Establish connection
try:
    conn = psycopg2.connect(
        host=host,
        port=port,
        dbname=dbname,
        user=user,
        password=password,
        connect_timeout=10 
    )
    print("Connection successful")

    # Load query results into a DataFrame
    df = pd.read_sql_query(query, conn)
    print("Query executed successfully!")

    # Display the DataFrame
    print(df)
except Exception as e:
    print(f"Error: {e}")

Connection successful
Query executed successfully!
   month  year  days report_date       journey_type  journeys_millions
0      1  2010    31  2010-06-30  Underground & DLR          96.836391
1      2  2010    28  2010-06-30  Underground & DLR          90.330504
2      3  2010    31  2010-06-30  Underground & DLR          90.038014
3      4  2010    30  2010-09-30  Underground & DLR          92.544093
4      5  2010    31  2010-09-30  Underground & DLR          88.662911
5      6  2010    30  2010-09-30  Underground & DLR          86.375702
6      7  2010    31  2010-12-31  Underground & DLR          94.986035
7      8  2010    31  2010-12-31  Underground & DLR          97.024794
8      9  2010    30  2010-12-31  Underground & DLR          97.029640
9     10  2010    31  2011-03-31  Underground & DLR          77.369979


# Queries to answer some business questions

In [6]:
# Queries
most_popular_types_query = """
SELECT journey_type,
    SUM(journeys_millions)/1000000 as total_journeys_millions
FROM journeys
GROUP BY journey_type
ORDER BY total_journeys_millions DESC;
"""

emirates_query = """
SELECT month, 
	year, 
	ROUND(journeys_millions,2) AS rounded_journeys_millions
FROM journeys
WHERE journey_type = 'Emirates Airline' AND journeys_millions IS NOT NULL
ORDER BY rounded_journeys_millions DESC
LIMIT 5;
"""

underground_query = """
SELECT year,
	journey_type,
	SUM(journeys_millions) as total_journeys_millions
FROM journeys
WHERE journey_type LIKE '%Underground%'
GROUP BY year, journey_type
ORDER BY total_journeys_millions
LIMIT 5;
"""

try:
    # Establish connection
    conn = psycopg2.connect(
        host=host,
        port=port,
        dbname=dbname,
        user=user,
        password=password,
        connect_timeout=10 
    )
    print("Connection successful")

    # Query 1: Most popular transport types
    print("\nExecuting Query 1: Most popular transport types")
    df1 = pd.read_sql_query(most_popular_types_query, conn)
    print("Query 1 Results:")
    print(df1)

    # Query 2: Emirates Airline popularity by month and year
    print("\nExecuting Query 2: Emirates Airline popularity by month and year")
    df2 = pd.read_sql_query(emirates_query, conn)
    print("Query 2 Results:")
    print(df2)

    # Query 3: Least popular years for Underground & DLR
    print("\nExecuting Query 3: Least popular years for Underground & DLR")
    df3 = pd.read_sql_query(underground_query, conn)
    print("Query 3 Results:")
    print(df3)

    # Close the connection
    conn.close()
    print("\nConnection closed.")

except Exception as e:
    print(f"Error: {e}")

Connection successful

Executing Query 1: Most popular transport types
Query 1 Results:
        journey_type  total_journeys_millions
0                Bus                 0.024905
1  Underground & DLR                 0.015020
2         Overground                 0.001667
3           TfL Rail                 0.000411
4               Tram                 0.000315
5   Emirates Airline                 0.000015

Executing Query 2: Emirates Airline popularity by month and year
Query 2 Results:
   month  year  rounded_journeys_millions
0      5  2012                       0.53
1      6  2012                       0.38
2      4  2012                       0.24
3      5  2021                       0.19
4      5  2013                       0.19

Executing Query 3: Least popular years for Underground & DLR
Query 3 Results:
   year       journey_type  total_journeys_millions
0  2020  Underground & DLR               310.179316
1  2021  Underground & DLR               748.452544
2  2022  Underground