In [2]:
# --------------------------CODE BY MANDI DISHA--------------------------
import psycopg2
import pandas as pd
from psycopg2 import OperationalError

def connect_db():
    """Connect to the PostgreSQL database and return the connection and cursor."""
    try:
        conn = psycopg2.connect(
            host="localhost",
            database="postgres",  # Connected to the 'postgres' database
            user="postgres",
            port="5432",
            password="mandi"
        )
        print("Connection successful!")
        return conn, conn.cursor()
    except OperationalError as e:
        print(f"Connection failed: {e}")
        return None, None

def fetch_movie_data(cur):
    """Run the SQL query and return the results as a Pandas DataFrame."""
    query = """
    SELECT
        m.rating, 
        COUNT(s.title) AS movie_count,
        ROUND(AVG(s.production_budget), 2) AS avg_production_budget,  
        ROUND(AVG(m.userscore), 2) AS avg_userscore,  
        ROUND(AVG(s.worldwide_box_office), 2) AS avg_box_office,
        ROUND((AVG(s.worldwide_box_office) - AVG(s.production_budget)) / AVG(s.production_budget) * 100, 2) AS avg_profit_percentage
    FROM
        sales_v20 s
    JOIN
        movies m
    ON
        s.title = m.title
    AND
        s.release_year = EXTRACT(YEAR FROM m.reldate)
    WHERE
        s.production_budget IS NOT NULL
    AND
        m.userscore IS NOT NULL 
    GROUP BY
        m.rating  
    ORDER BY
        avg_production_budget DESC, 
        avg_userscore DESC,  
        avg_box_office DESC;
    """
    cur.execute(query)
    # Fetch all rows from the query
    rows = cur.fetchall()
    
    # Get column names
    colnames = [desc[0] for desc in cur.description]

    # Return the data as a pandas DataFrame
    return pd.DataFrame(rows, columns=colnames)

def close_db(conn, cur):
    """Close the database connection and cursor."""
    cur.close()
    conn.close()

# Main function to encapsulate everything
def main():
    conn, cur = connect_db()
    if conn and cur:
        movie_data = fetch_movie_data(cur)
        print("Data from the query:")
        print(movie_data.head())  # Display the first few rows

        # Perform some analysis
        # Example: Describe the data
        print("\nSummary statistics:")
        print(movie_data.describe())

        # Example: Sort by average profit percentage
        sorted_data = movie_data.sort_values(by="avg_profit_percentage", ascending=False)
        print("\nTop ratings by profit percentage:")
        print(sorted_data.head())

        # Close the database connection
        close_db(conn, cur)

if __name__ == "__main__":
    main()


#  [END CODE BY MANDI DISHA]

Connection successful!
Data from the query:
    rating  movie_count avg_production_budget avg_userscore avg_box_office  \
0     TV-G            4           91500000.00          7.03   473407170.25   
1        G           48           70885416.67          6.73   245814300.02   
2       PG          428           64224567.76          6.43   201635396.44   
3    PG-13         1122           57289148.87          6.46   171838831.59   
4  Unrated           65           37464924.34          6.48   106835608.08   

  avg_profit_percentage  
0                417.38  
1                246.78  
2                213.95  
3                199.95  
4                185.16  

Summary statistics:
       movie_count
count    14.000000
mean    221.642857
std     426.563659
min       1.000000
25%       6.500000
50%      25.000000
75%      82.250000
max    1263.000000

Top ratings by profit percentage:
   rating  movie_count avg_production_budget avg_userscore avg_box_office  \
13     NR            1     