In [22]:
import pandas as pd
import requests
import csv


# stores datasets as variables
aid = pd.read_csv("DataSet Financial Aid.csv")
school_info = pd.read_csv("DataSet School Information.csv")
admissions = pd.read_csv("DataSet Admissions.csv")
tuition = pd.read_csv("DataSet Student Expenses.csv")

# drops rows in school_info whose names don't match up to the admissions dataset
df = school_info.schoolname.isin(admissions.schoolname)
school_info = school_info[df]

# makes sure that index is being changed with each drop (recounts index)
school_info = school_info.reset_index(drop=True)

# Information that will be held about each school
lsat = admissions.LSAT50
gpa = admissions.UGGPA50
name = school_info.schoolname
acc_rate = admissions.NumOffers / admissions.NumApps
location = school_info.SchoolState
size = admissions.TotalFirstYear
tuition = [tuition.FTResTuition, tuition.FTNonResTuition]
group = school_info.SchoolType   
description = ""

# dictionaray that will keep info about each school
schools = {}

"""
sets the dictionary as the nameOfUni: InfoAboutUni.
Checks if school is public, to add OOS tution
"""

for i, n in enumerate(name):
    schools[n] = [name[i],group[i], location[i],  lsat[i], gpa[i], round(acc_rate[i]* 100) ,
                      size[i] * 3, tuition[0][i], description]    
    if(schools[n][1] == 'PUB'):
        schools[n].insert(8, tuition[1][i])

{'Faulkner University': ['Faulkner University', 'PRI', 'AL', np.int64(150), np.float64(3.21), 58, np.int64(357), np.int64(40400), ''], 'Samford University': ['Samford University', 'PRI', 'AL', np.int64(155), np.float64(3.61), 56, np.int64(456), np.int64(45236), ''], 'University of Alabama': ['University of Alabama', 'PUB', 'AL', np.int64(167), np.float64(3.95), 27, np.int64(378), np.int64(24917), np.int64(46407), ''], 'Arizona State University': ['Arizona State University', 'PUB', 'AZ', np.int64(167), np.float64(3.9), 21, np.int64(597), np.int64(28839), np.int64(50317), ''], 'University of Arizona': ['University of Arizona', 'PUB', 'AZ', np.int64(163), np.float64(3.79), 29, np.int64(357), np.int64(26200), np.int64(30700), ''], 'University of Arkansas-Little Rock': ['University of Arkansas-Little Rock', 'PUB', 'AR', np.int64(152), np.float64(3.39), 59, np.int64(378), np.int64(16726), np.int64(32686), ''], 'University of Arkansas-Fayetteville': ['University of Arkansas-Fayetteville', 'PU

In [25]:
# function that returns name of school that the user has a high chance of getting into based on inputs
def match(loc, lsat, gpa ):
    matches = []
    for key, value in schools.items():
        if( lsat > value[3] + 2):
            temp_gpa = gpa + .3
            
        if ( value[2] == loc and 
            value[3] <= lsat - 2  and 
            value[4] <= temp_gpa - .05
        ):
            matches.append(key)
    return matches

match("TX", 170, 4)

['Baylor University',
 'Southern Methodist University',
 "St. Mary's University",
 'Texas A&M University',
 'Texas Southern University',
 'Texas Tech University',
 'University of Houston',
 'South Texas College of Law',
 'UNT Dallas College Of Law']

In [None]:
# module from GPT API
from groq import Groq

client = Groq(api_key=os.environ.get("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"),)

# Gets short description of school based on chat query
for school in schools:
    completion = client.chat.completions.create(
        model="llama3-8b-8192",
        messages=[
            {
                "role": "user",
                "content": "give me a very short description of" + school + "law school. Make sure to just give me the content. No extra \"here is your description\""
            },
        ],
        temperature=1,
        max_tokens=1024,
        top_p=1,
        stream=True,
        stop=None,
    )

    # Saves description in school information. Index changes based on if school has OOS tuition
    for chunk in completion:
        description += chunk.choices[0].delta.content or ""
        if len(schools[school]) == 9:
            schools[school][8] = description
        else:
            schools[school][9] = description
    description = ""

In [6]:
# headers of CSV file
headers = [
    'Name', 'Group', 'Location', 'LSAT', 'GPA', 'Acceptance Rate (%)', 
    'Size', 'Tuition', 'OOS Tuition (if PUB)', 'Description']

# Creates a CSV file of school information dictionary
with open('schools2.csv', mode='w', newline='', encoding='utf-8') as file:
    writer = csv.writer(file)

    # Write the header row
    writer.writerow(headers)

    for name, data in schools.items():
        row = data

        # Adds "" if school is private to avoid NULL fields.
        if len(row) < len(headers):
            row.insert(8,'')  # Add empty string for missing fields
        writer.writerow(row)

['UNT Dallas College Of Law', 'PUB', 'Dallas, TX', np.int64(153), np.float64(3.43), np.float64(30.06), np.int64(137), np.int64(18750), np.int64(31650), '']


In [None]:
import sqlite3

# File paths
csv_file = 'schools.csv'  
db_file = 'schools2.db'    # Name of table

# Creates schools.db table
conn = sqlite3.connect(db_file)
cursor = conn.cursor()

# Creates table
cursor.execute('''
CREATE TABLE schools (
    name TEXT PRIMARY KEY,
    group_type TEXT,
    location TEXT,
    lsat INTEGER,
    gpa REAL,
    acceptance_rate REAL,
    size INTEGER,
    tuition INTEGER,
    oos_tuition INTEGER,
    description TEXT
)
''')


# Open and read the CSV file
with open(csv_file, mode='r', encoding='utf-8') as file:
    csv_reader = csv.reader(file)
    headers = next(csv_reader)  # Skip the header row

    # Insert each row into the database
    for row in csv_reader:
        cursor.execute('''
        INSERT INTO schools (name, group_type, location, lsat, gpa, acceptance_rate, size, tuition, oos_tuition, description)
        VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
        ''', row)

# Commit the changes and close the connection
conn.commit()
conn.close()