## Team Olympia - Aidan Bradshaw, David Li, Akylai Batyrbekova

`Institutions`

The Institutions table stores core institutional data that rarely changes. It serves as the main entity to represent institutions with unique attributes that don't change frequently. The UNITID is our primary key, so each institution has a unique identifier.

`Location`

The Location table stores region-specific data that is shared. We avoid redundancy by storing regional data separately—linked by UNITID foreign key.

`Financial_Data`

This table tracks annual financial metrics by institution, standardized with naming conventions. It stores things like tuition, revenue, salary, and loan default metrics over time (will help with dashboard later).

`Admissions_Data`

This table allows users to get admission statistics on a yearly basis for analysis and comparison. It maintains a history of acceptance and graduation trends.

`Fields_of_Study`

This table tracks data on the fields of study offered by each institution per year. It stores program-specific information, allowing users to see trends by field and degree types.

`IPEDS_Directory`

This is a centralized table for directory and census data—especially for location. It stores address, location, and census data to minimize redundancy.

In [None]:
-- Primary table to store institution-specific information
CREATE TABLE Institutions (
    UNITID int PRIMARY KEY,                         -- Unique institution ID from College Scorecard/IPEDS
    OPEID varchar(10),                              -- Identifier connecting Scorecard and IPEDS
    INSTNM varchar(255),                            -- Institution name (previously INSTITUTION_NAME)
    CONTROL varchar(50),                            -- Type of institution: Public, Private
    ACCREDAGENCY varchar(255),                      -- Accrediting agency name
    PREDDEG varchar(50),                            -- Most common degree (previously in Annual_Institution_Data)
    HIGHDEG varchar(50)                             -- Highest degree offered (previously in Annual_Institution_Data)
);

-- Table to store location-related data linked to Institutions by UNITID
CREATE TABLE Location (
    UNITID int REFERENCES Institutions(UNITID) ON UPDATE CASCADE ON DELETE CASCADE, -- Foreign key to link to Institutions
    REGION varchar(50),                          
    ST_FIPS varchar(10),                         -- County FIPS code (unique identifier for county-level data)
    ADDR varchar(255),                        
    CITY varchar(50),                            
    STABBR varchar(2),                            
    ZIP varchar(10),                                                   
    PRIMARY KEY (UNITID)                          -- Ensures each institution has unique metadata
);

-- Financial data with a composite primary key based on YEAR and UNITID
CREATE TABLE Financial_Data (
    YEAR int,                                    -- Year of the data entry
    UNITID int REFERENCES Institutions(UNITID) ON UPDATE CASCADE ON DELETE CASCADE, -- Foreign key to Institutions
    TUITIONFEE_IN decimal(10, 2),                -- In-state tuition fees
    TUITIONFEE_OUT decimal(10, 2),               -- Out-of-state tuition fees
    TUITIONFEE_PROG decimal(10, 2),              -- Program-specific tuition fees
    TUITFTE decimal(10, 2),                      -- Tuition revenue per full-time equivalent student
    AVGFACSAL decimal(10, 2),                    -- Average annual faculty salary
    CDR2 decimal(5, 2),                          -- Two-year loan default rate
    CDR3 decimal(5, 2),                          -- Three-year loan default rate
    PRIMARY KEY (YEAR, UNITID)                   -- Composite primary key
);

-- Admissions data with a composite primary key based on YEAR and UNITID
CREATE TABLE Admissions_Data (
    YEAR int,                                    -- Year of the data entry
    UNITID int REFERENCES Institutions(UNITID) ON UPDATE CASCADE ON DELETE CASCADE, -- Foreign key to Institutions
    ADM_RATE decimal(5, 2),                     
    GRAD_DEBT_MDN int,                           -- Median graduate debt
    SATMTMID int,                                -- SAT math mid-score
    ACTMTMID int,                                -- ACT math mid-score
    PRIMARY KEY (YEAR, UNITID)                   -- Composite primary key
);

-- IPEDS Directory with a composite primary key based on YEAR and UNITID
CREATE TABLE IPEDS_Directory (
    UNITID int REFERENCES Institutions(UNITID) ON UPDATE CASCADE ON DELETE CASCADE, -- Foreign key to Institutions
    YEAR int,                                    -- Year of the IPEDS data entry                         
    CBSA varchar(10),                            -- Core-Based Statistical Area code
    CBSATYPE varchar(50),                       -- CBSA classification type
    CSA varchar(10),                             -- Combined Statistical Area code
    CCBASIC varchar(50),                         -- Carnegie Classification
    CCUGPROF varchar(50),                        -- Carnegie Undergraduate Profile 
    CCSIZSET varchar(50),                        -- Carnegie Size and Setting variable
    CCIPUG varchar(50),                        -- Carnegie Undergraduate Instructional Program
    CCIPGRD varchar(50),                           -- Carnegie Graduate Instructional Program
    CCENPROF varchar(50),                        -- Carnegie Enrollment Profile
    LATITUDE decimal(9,6),                       
    LONGITUD decimal(9,6), 
    PRIMARY KEY (YEAR, UNITID)                   -- Composite primary key to allow annual updates
);


SyntaxError: invalid syntax (351014573.py, line 1)