# US Institutions Financial Report

In [18]:
# May need to install pandas directly in the Jupyter notebook with the following commands
#! pip3 install --user pandas
#! pip3 install --user psycopg
import pandas as pd
import psycopg
import matplotlib as plt
from credentials import DBNAME, HOST, USERNAME, PASSWORD

In [2]:
# Parameters
YEAR = 2020


In [3]:
# Set the connection and grab the data from SQL

# Connect to the database using parameters from credentials.py
conn = psycopg.connect(
    dbname=DBNAME,
    host=HOST,
    user=USERNAME,
    password=PASSWORD)

# Create a cursor and use it to submit/execute a query:
cur = conn.cursor()

select_cmd = """
    SELECT * FROM institutions 
    WHERE extracted_year <= to_date(%s::text ,'YYYY-MM-DD');
"""

# Save to a dataframe
df = pd.DataFrame(cur.execute(select_cmd, (YEAR,)))

# Close the connection
cur.close()
conn.close()

# Data Summary

# Summary of Current College Tuition Rates

# Best and Worst Performing Institutions by Loan-Repayment Rates

# Tuition and Loan Repayment Rates over Time

How have tuition rates and loan repayment rates changed over time for the top 10 most expensive universities in the U.S.?
We will explore this by first finding the top 10 most expensive schools.

In [None]:
-- First find the top 10 most expensive schools
-- Find most expensive out of these variables:
-- in_state_tuit, out_state_tuit, prog_year_tuit
-- also select 3-year default (loan repayment rate) and the year
SELECT name, in_state_tuit, out_state_tuit, prog_year_tuit, three_yr_default, extracted_year
FROM institutions
ORDER BY GREATEST(in_state_tuit, out_state_tuit, prog_year_tuit) DESC
LIMIT 10;

Next, graph these schools based on their tuition and loan repayment rates.

In [None]:
# get results later
results = pd.DataFrame({
   'name': ['University A', 'University B', 'University C', ...],
   'in_state_tuit': [10000, 12000, 15000, ...],
   'out_state_tuit': [20000, 22000, 25000, ...],
   'prog_year_tuit': [18000, 20000, 22000, ...],
   'three_yr_default': [0.05, 0.03, 0.02, ...],
   'extracted_year': ['2022-01-01', '2022-01-01', '2022-01-01', ...],
})


results['extracted_year'] = pd.to_datetime(results['extracted_year'])


plt.figure(figsize=(10, 6))


# Plot each university's tuition and loan repayment rate over time
for index, row in results.iterrows():
   plt.scatter(row['in_state_tuit'], row['three_yr_default'], label=row['name'])


plt.title('Tuition vs. Loan Repayment Rate Over Time for Top 10 Most Expensive Universities')
plt.xlabel('In-State Tuition')
plt.ylabel('Three-Year Default Rate')
plt.legend()
plt.grid(True)


plt.show()

# SAT Scores and College Admission Rate

# Faculty Salary and Revenue Tuition