## Data Classification
Classifies by club category

In [1]:
import re

import pandas as pd

In [2]:
df = pd.read_csv('org-funding-data.csv')
df

Unnamed: 0,Club/Org Name,Funding,Date
0,Girl Up,900,01.06.2025
1,Girl Up,550,01.06.2025
2,Model United Nations,2255,01.06.2025
3,Model United Nations,5444.24,01.06.2025
4,Muslim Student Association,265,01.06.2025
...,...,...,...
292,Horizons Incentive Program,150,11.25.2024
293,Chemistry Club,1939.44,11.25.2024
294,Residential & Community Living,2500,11.25.2024
295,Alpha Kappa Psi,2739.40,11.25.2024


In [3]:
club_keywords = {
    
    "Sustainability/Environment": ["environment", "climate", "eco", "green", "sustain"],
    "Professional - STEM": ["developer", "hack", "physics", "chemistry", "chemist", "machine", "biology", "math", "racing", "science", "robot", "data", "research", "rocket", "compute", "engineer"],
    "Professional - Finance/Business": ["sales", "consulting", "finance", "investment", "economics", "business", "marketing"],
    "Professional - Civic" : ["court", "trial", "model united nations", "legal", "law", "debate"],
    "Religious": ["muslim", "christian", "catholic", "hindu", "buddhism", "chabad"],
    "Cultural": ["student union", "students association", "student association"],
    "Arts/Performance": ["media", "vocal", "dance", "theatre", "music", "a cappella", "band", "drama", "film", "art", "A Capella"],
    "Politics/Activism": ["democrat", "republican", "activism", "policy", "justice", "vote", "civic"],
    "Fraternity/Sorority": ["fraternity", "sorority", "alpha", "beta", "gamma", "sigma", "kappa", "omega", "phi", "theta", "delta"],
    "Sports/Fitness": ["weightlifting", "swim", "running", "cheer", "gym", "ski", "gains", "meditation", "soccer", "basketball", "Badminton", "tennis", "baseball", "sports", "fitness", "gym", "hiking", "recreation", "men's", "women's"],
    "Associated Students": ["AS ", "Associated Students"],
    "Professional - Healthcare" : ["doctor", "physician", "nurse", "health", "medical"],
    "UCSB Programs" : ["office", "center", "program"],
    "Academic" : ["y association", "y group", "y society", "y club"]
    
}

def get_category(name):
    
    for category, keywords in club_keywords.items():
        for keyword in keywords:
            if keyword.lower() in name.lower():
                return (category)

    return "Uncategorized"

pd.set_option('display.max_rows', None)

df['Category'] = df['Club/Org Name'].apply(lambda x: get_category(x))

df


Unnamed: 0,Club/Org Name,Funding,Date,Category
0,Girl Up,900.0,01.06.2025,Uncategorized
1,Girl Up,550.0,01.06.2025,Uncategorized
2,Model United Nations,2255.0,01.06.2025,Professional - Civic
3,Model United Nations,5444.24,01.06.2025,Professional - Civic
4,Muslim Student Association,265.0,01.06.2025,Religious
5,UCSBreakin',1000.0,01.06.2025,Uncategorized
6,Association for Computing Machinery,150.0,01.06.2025,Professional - STEM
7,Men's Rugby,16071.0,01.06.2025,Sports/Fitness
8,REALITY,345.07,01.06.2025,Uncategorized
9,Gaucho Pep Band,9474.0,01.06.2025,Arts/Performance


In [4]:
catted = df[df['Category'] != 'Uncategorized']
catted

Unnamed: 0,Club/Org Name,Funding,Date,Category
2,Model United Nations,2255.0,01.06.2025,Professional - Civic
3,Model United Nations,5444.24,01.06.2025,Professional - Civic
4,Muslim Student Association,265.0,01.06.2025,Religious
6,Association for Computing Machinery,150.0,01.06.2025,Professional - STEM
7,Men's Rugby,16071.0,01.06.2025,Sports/Fitness
9,Gaucho Pep Band,9474.0,01.06.2025,Arts/Performance
11,Badminton,2000.0,01.13.2025,Sports/Fitness
12,Ski and Snowboarding,7685.0,01.13.2025,Sports/Fitness
13,Anthropology Club,200.0,01.13.2025,Academic
14,Data Science,2500.0,01.13.2025,Professional - STEM


In [5]:
uncategorized = df[df['Category'] == 'Uncategorized']
uncategorized

Unnamed: 0,Club/Org Name,Funding,Date,Category
0,Girl Up,900.0,01.06.2025,Uncategorized
1,Girl Up,550.0,01.06.2025,Uncategorized
5,UCSBreakin',1000.0,01.06.2025,Uncategorized
8,REALITY,345.07,01.06.2025,Uncategorized
10,Dhadkan,6133.0,01.06.2025,Uncategorized
15,"Tricking, Parkour, & Tumbling Club",500.0,01.13.2025,Uncategorized
16,"Tricking, Parkour, & Tumbling Club",750.0,01.13.2025,Uncategorized
17,Gaucho Gaming,35.0,01.13.2025,Uncategorized
22,Girl Up,800.0,01.13.2025,Uncategorized
23,Girl Up,60.0,01.13.2025,Uncategorized


In [6]:
df.loc[df['Club/Org Name'].isin(['Girl Up',
                                 'YouthBridge Housing CA',
                                 'Crescendo for a Cause',
                                 "Your Children's Trees",
                                 'Kids in Nutrition']), 'Category'] = 'Community Service'

df.loc[df['Club/Org Name'].isin(["UCSBreakin'",
                                 'REALITY', 
                                 'Dhadkan', 
                                 'Ravaani',  
                                 'Taara',  
                                 'Laughology', 
                                 'Raices de Mi Tierra', 
                                 'Agni']), 'Category'] = 'Arts/Performance'

df.loc[df['Club/Org Name'].isin(['Tricking, Parkour, & Tumbling Club',
                                 'Triathlon',
                                 'Strikers Club']), 'Category'] = 'Sports/Fitness'

df.loc[df['Club/Org Name'].isin(['Gaucho Gaming',
                                 'Chess Society',
                                 'Fashion Club',
                                 'Cube Club',]), 'Category'] = 'Recreational/Social'

df.loc[df['Club/Org Name'] == 'Ethics Bowl', 'Category'] = 'Professional - Civic'

df.loc[df['Club/Org Name'].isin(['Lebanese Social Club',
                                 'Comunidad Latinx Graduacion',
                                 'Persian Student Group',
                                 'Indus', 
                                 'El Congreso de', 
                                 'Kapatirang Pilipino',
                                 'Chinese Students and Scholars Association']), 'Category'] = 'Cultural'

df.loc[df['Club/Org Name'] == 'Move@UCSB', 'Category'] = 'Research'

df.loc[df['Club/Org Name'].isin(['SB Creative Lab',
                                 'TEDx',
                                 'Quizbowl',
                                 'Student Veterans Organization',]), 'Category'] = 'Academic'


df.loc[df['Club/Org Name'] == 'Kristos Campus Missions', 'Category'] = 'Religious'

df.loc[df['Club/Org Name'] == 'Gaucho Marks', 'Category'] = 'Publication'

df.loc[df['Club/Org Name'] == 'Mold Kits IVTU', 'Category'] = 'Associated Students'

df.loc[df['Club/Org Name'] == 'Santa Barbara Case Club', 'Category'] = 'Professional - Finance/Business'

df.loc[df['Club/Org Name'] == 'Residential & Community Living' , 'Category'] = 'UCSB Program'

df.loc[df['Club/Org Name'] == 'Los Ingenieros', 'Category'] = 'Professional - STEM'

In [7]:
df.to_csv('classified-org-funding-data.csv', index=False)

df

Unnamed: 0,Club/Org Name,Funding,Date,Category
0,Girl Up,900.0,01.06.2025,Community Service
1,Girl Up,550.0,01.06.2025,Community Service
2,Model United Nations,2255.0,01.06.2025,Professional - Civic
3,Model United Nations,5444.24,01.06.2025,Professional - Civic
4,Muslim Student Association,265.0,01.06.2025,Religious
5,UCSBreakin',1000.0,01.06.2025,Arts/Performance
6,Association for Computing Machinery,150.0,01.06.2025,Professional - STEM
7,Men's Rugby,16071.0,01.06.2025,Sports/Fitness
8,REALITY,345.07,01.06.2025,Arts/Performance
9,Gaucho Pep Band,9474.0,01.06.2025,Arts/Performance
