In [1]:
import pandas as pd
import requests
import time
import os
from perplexipy import PerplexityClient
import openpyxl


In [2]:
key = os.environ['PERPLEXITY_API_KEY']
client = PerplexityClient(key = key)

In [6]:
def fetch_perplexity_data(university, state, major):
    prompt = f"""Provide concise answers for {university} in {state} for {major} major:
    1. University ethos
    2. Desired student character traits
    3. Careers offered in this major
    4. University's unique selling point
    5. What matters for student idiosyncrasies"""
    
    response = client.query(prompt)
    return response.split('\n')

In [15]:
fetch_perplexity_data('Boston University'
, 'Massachusetts', 'BA in Economics')

['Here are the concise answers for Boston University in Massachusetts for a BA in Economics major:',
 '',
 '**1. University ethos:**',
 "Boston University's ethos is centered around academic excellence, innovation, and community engagement. The university values diversity, inclusivity, and social responsibility, fostering a culture of intellectual curiosity and critical thinking.",
 '',
 '**2. Desired student character traits:**',
 'Boston University seeks students who are:',
 '\t* Intellectually curious and passionate about learning',
 '\t* Analytically minded and able to think critically',
 '\t* Effective communicators and collaborators',
 '\t* Open-minded and respectful of diverse perspectives',
 '\t* Motivated to make a positive impact in their communities',
 '',
 '**3. Careers offered in this major:**',
 'A BA in Economics from Boston University can lead to careers in:',
 '\t* Finance and banking',
 '\t* Government and public policy',
 '\t* International trade and development',
 '

In [16]:
def parse_perplexity_response(results):
    parsed_data = {
        'Ethos': '',
        'Character_Traits': '',
        'Careers': '',
        'Unique_Selling_Point': '',
        'Student_Idiosyncrasies': ''
    }
    
    current_section = ''
    for line in results:
        if '**1. University ethos:**' in line:
            current_section = 'Ethos'
        elif '**2. Desired student character traits:**' in line:
            current_section = 'Character_Traits'
        elif '**3. Careers offered in this major:**' in line:
            current_section = 'Careers'
        elif "**4. University's unique selling point:**" in line:
            current_section = 'Unique_Selling_Point'
        elif '**5. What matters for student idiosyncrasies:**' in line:
            current_section = 'Student_Idiosyncrasies'
        elif line.strip() and current_section:
            if parsed_data[current_section]:
                parsed_data[current_section] += ' ' + line.strip()
            else:
                parsed_data[current_section] = line.strip()
    
    return parsed_data

In [7]:

# def fetch_perplexity_data(university, country, state, major):
#     prompt = f"""Provide concise answers for {university} in {state} for {major} major:
#     1. University ethos
#     2. Desired student character traits
#     3. Careers offered in this major
#     4. University's unique selling point
#     5. What matters for student idiosyncrasies"""
    
#     response = requests.post(
#         "https://api.perplexity.ai/chat/completions",
#         headers={
#             "Authorization": "Bearer YOUR_API_KEY_HERE",
#         },
#         json={
#             "model": "mixtral-8x7b-instruct",
#             "messages": [{"role": "user", "content": prompt}]
#         }
#     )
    
#     if response.status_code == 200:
#         return response.json()['choices'][0]['message']['content'].split('\n')
#     else:
#         return ["Error"] * 5


In [8]:

# Read the CSV file
# df = pd.read_csv('your_input_file.csv')
df = pd.read_excel('Grad 2024 US Applications.xlsx', sheet_name='in')

In [9]:
df.head()

Unnamed: 0,GRADUATION_YEAR,UNIVERSITY_NAME,UNIVERSITY_COUNTRY,STATE,MAJOR
0,2024,Boston University,United States,Massachusetts,BA in Economics
1,2024,University of Connecticut,United States,Connecticut,Economics
2,2024,"University of California, Irvine",United States,California,Business Administration
3,2024,University of Michigan Ann Arbor,United States,Michigan,Not Set
4,2024,University of Virginia,United States,Virginia,Economics


In [10]:

# Initialize new columns
df['Ethos'] = ''
df['Character_Traits'] = ''
df['Careers'] = ''
df['Unique_Selling_Point'] = ''
df['Student_Idiosyncrasies'] = ''


In [11]:
# Process only the first 5 rows
for index, row in df.head(5).iterrows():
    print(f"Processing row {index + 1}/5")
    
    results = fetch_perplexity_data(
        row['UNIVERSITY_NAME'],
        row['STATE'],
        row['MAJOR']
    )
    
    parsed_data = parse_perplexity_response(results)
    
    for column, value in parsed_data.items():
        df.at[index, column] = value
        
    # Avoid rate limiting
    time.sleep(1)

Processing row 1/5
Processing row 2/5
Processing row 3/5
Processing row 4/5
Processing row 5/5


In [13]:
df.head()

Unnamed: 0,GRADUATION_YEAR,UNIVERSITY_NAME,UNIVERSITY_COUNTRY,STATE,MAJOR,Ethos,Character_Traits,Careers,Unique_Selling_Point,Student_Idiosyncrasies
0,2024,Boston University,United States,Massachusetts,BA in Economics,Here are the concise answers for Boston Univer...,,**1. University ethos:**,Boston University's ethos is centered around a...,
1,2024,University of Connecticut,United States,Connecticut,Economics,Here are the concise answers for the Universit...,,**1. University ethos:**,The University of Connecticut's ethos is cente...,
2,2024,"University of California, Irvine",United States,California,Business Administration,Here are the concise answers for University of...,,**1. University ethos:**,"UCI's ethos is centered around innovation, col...",
3,2024,University of Michigan Ann Arbor,United States,Michigan,Not Set,Here are the concise answers for University of...,,**1. University ethos:**,The University of Michigan Ann Arbor's ethos i...,
4,2024,University of Virginia,United States,Virginia,Economics,Here are the concise answers for University of...,,**1. University ethos:**,The University of Virginia's ethos is rooted i...,


In [None]:
# # Process each row
# for index, row in df.iterrows():
#     print(f"Processing row {index + 1}/{len(df)}")
    
#     results = fetch_perplexity_data(
#         row['UNIVERSITY_NAME'],
#         row['STATE'],
#         row['MAJOR']
#     )
    
#     parsed_data = parse_perplexity_response(results)
    
    # for column, value in parsed_data.items():
    #     df.at[index, column] = value
    
#     # Avoid rate limiting
#     time.sleep(1)

In [12]:
# Export to Excel
df.to_excel('output_file.xlsx', index=False)
print("Processing complete. Results saved to 'output_file.xlsx'")

Processing complete. Results saved to 'output_file.xlsx'


In [None]:

# # Export to CSV
# df.to_csv('output_file.csv', index=False)
# print("Processing complete. Results saved to 'output_file.csv'")