In [61]:
import numpy as np
import pandas as pd
import re

from fuzzywuzzy import process
pd.options.display.max_colwidth = 200

# Load 2019-2020 final allocations

In [16]:
final = pd.read_csv('C:\\Users\\scher\\school\\senior-thesis\\data\\rso_data.txt', index_col=0)
final = final[final['Year']==2019].reset_index().drop(columns='index')
final

Unnamed: 0,Year,Organization,Type,Designation,Standing,Allocation
0,2019,The B-Side,PUB,Publications RSO,7,350.0
1,2019,BARE Magazine,PUB,Publications RSO,12,4077.0
2,2019,Berkeley Economic Review,PUB,Academic RSO,2,700.0
3,2019,Berkeley Fiction Review,PUB,Publications RSO,36,2000.0
4,2019,Berkeley Poetry Review,PUB,Publications RSO,25,1520.0
...,...,...,...,...,...,...
521,2019,Volunteers for Medical Outreach,SISG,Service RSO,6,2200.0
522,2019,Wonderworks,SISG,Service RSO,9,0.0
523,2019,You Mean More,SISG,Health & Wellness RSO,8,1500.0
524,2019,Youth Empowerment Program,SISG,Service RSO,6,1850.0


# Load and clean 2019-2020 initial allocations

In [17]:
initial = pd.read_csv('C:\\Users\\scher\\school\\senior-thesis\\data\\reformatted\\2019-2020_initial.txt', 
                      delimiter='\t',
                      usecols=[1,3,5,7],
                      names=['Organization', 'Type', 'Standing', 'Allocation'])
initial = initial[initial['Type'].isin(['PUB','SAG','SISG'])]
initial['Standing'] = initial['Standing'].apply(int)

def clean_alloc(alloc):
    if alloc == 'Sponsorship Only' or alloc == 'Sponsorship only' or alloc == 'Sponsorsphip Only':
        return 0
    
    alloc = alloc.replace('$','')
    
    if ',' in alloc:
        alloc = alloc.replace(',','')
        
    return float(alloc)

initial['Allocation'] = initial['Allocation'].apply(clean_alloc)

initial

Unnamed: 0,Organization,Type,Standing,Allocation
18,"B-Side, The",PUB,7,350.0
19,BARE Magazine,PUB,12,4077.0
20,Berkeley Economic Review,PUB,2,700.0
21,Berkeley Fiction Review,PUB,36,2000.0
22,Berkeley Poetry Review,PUB,25,1520.0
...,...,...,...,...
490,Volunteers Around the World at Berkeley,SISG,6,200.0
491,Volunteers for Medical Outreach,SISG,6,2200.0
492,You Mean More,SISG,8,1500.0
493,Youth Empowerment Program,SISG,6,1850.0


In [41]:
def org_matcher(initial_name, final_df):
    best_match = process.extractOne(initial_name, final_df['Organization'])
    
    best_match_name = best_match[0]
    best_match_score = best_match[1]
    
    if best_match_score == 100:
        return best_match_name
    
    print(f'The initial name is: {initial_name}. \n The closest match is: {best_match_name}.')
    auth = input('authorize match?')
    if auth == 'y':
        return best_match_name
    else:
        return np.nan

In [47]:
initial['matched_names'] = initial['Organization'].apply(lambda x: org_matcher(x, final))

The initial name is: B-Side, The. 
 The closest match is: The B-Side.
authorize match?y
The initial name is: Berkeley Political Review, The. 
 The closest match is: The Berkeley Political Review.
authorize match?y
The initial name is: Blue & Gold Yearbook. 
 The closest match is: Blue and Gold Yearbook.
authorize match?y
The initial name is: Cal Literature and Arts Magazine. 
 The closest match is: Cal Literature & Arts Magazine.
authorize match?y
The initial name is: Intercollegiate Finance Journal at UC Berkeley. 
 The closest match is: Intercollegiate Finance Journal at Berkeley.
authorize match?y
The initial name is: Smart Ass, The. 
 The closest match is: The Smart Ass.
authorize match?y
The initial name is: threads (formerly known as Al-Bayan). 
 The closest match is: Threads.
authorize match?y
The initial name is: American Institute of Architecture Students -UC Berkeley Chapter. 
 The closest match is: American Institute of Architecture Students.
authorize match?y
The initial na

The initial name is: American Society for Biochemistry and Molecular Biology at Berkeley. 
 The closest match is: American Society for Biochemistry and Molecular Biology.
authorize match?n
The initial name is: Bhagat Puran Singh Health Initiative @ Berkeley. 
 The closest match is: Bhagat Puran Singh Health Initiative at Berkeley.
authorize match?y
The initial name is: Cal Berkeley Habitat for Humanity. 
 The closest match is: Cal Habitat for Humanity.
authorize match?y
The initial name is: California Health Professional Student Alliance. 
 The closest match is: California Health Professional Student Alliance at UC Berkeley.
authorize match?y
The initial name is: Delta Sigma Theta Sorority Incorporated-Kappa Chapter. 
 The closest match is: Alpha Kappa Psi.
authorize match?n
The initial name is: Destress With Dogs. 
 The closest match is: De-stress with Dogs.
authorize match?y
The initial name is: Greening the Greeks. 
 The closest match is: The B-Side.
authorize match?n
The initial na

In [56]:
initial[initial['matched_names'].isna()]

Unnamed: 0,Organization,Type,Standing,Allocation,matched_names
78,Autonomous Underwater Vehicles Team at Berkeley,SAG,3,0.0,
89,Berkeley Energy and Resources Collaborative Un...,SAG,1,200.0,
94,Berkeley Opinion,SAG,6,1250.0,
141,Coalition to Defend Affirmative Action By Any ...,SAG,21,5000.0,
161,Delta Xi Phi Multicultural Sorority Inc.,SAG,3,500.0,
187,Gamma Rho Lambda,SAG,3,500.0,
188,"Gamma Zeta Alpha Fraternity, Inc",SAG,11,685.0,
189,Gates Millennium Scholars Association,SAG,13,1000.0,
193,Golden Women,SAG,3,500.0,
225,Latinx Emerging in English,SAG,3,500.0,


In [125]:
# Drop RSOs that are part of the Panhellenic Council.
# initial = initial.drop([161,187,188,301,302,372,373,410,424,440,441,442,453,460,464])

In [126]:
initial.at[78, 'matched_names'] = 'Underwater Robotics at Berkeley'
initial.at[89, 'matched_names'] = 'Berkeley Energy and Resources Collaborative (Graduate group)'
initial.at[94, 'matched_names'] = 'BERKOP'
initial.at[141, 'matched_names'] = 'BAMN - Coalition to Defend Affirmative Action, Integration, and Immigrant Rights and Fight for Equality By Any Means Necessary'
initial.at[189, 'matched_names'] = 'Gates Millennium Student Association'
initial.at[193, 'matched_names'] = 'The Golden'
initial.at[225, 'matched_names'] = 'Students of Color Emerging in English'
initial.at[314, 'matched_names'] = 'Student Association for Applied Statistics'
initial.at[485, 'matched_names'] = 'V-Day at Berkeley'

In [None]:
initial = initial.drop(
    'Organization', axis=1).rename(
    {'matched_names':'Organization'}, axis=1)

In [154]:
initial = pd.read_csv('C:\\Users\\scher\\school\\senior-thesis\\data\\initial_allocs_2019', index_col=0)

In [None]:
initial = initial.merge(
    final[['Organization','Designation']], 
    on='Organization')[['Organization', 'Type', 'Designation', 'Standing', 'Allocation']]

In [167]:
initial.to_csv('C:\\Users\\scher\\school\\senior-thesis\\data\\initial_allocs_2019')

initial

Unnamed: 0,Organization,Type,Designation,Standing,Allocation
0,The B-Side,PUB,Publications RSO,7,350.0
1,BARE Magazine,PUB,Publications RSO,12,4077.0
2,Berkeley Economic Review,PUB,Academic RSO,2,700.0
3,Berkeley Fiction Review,PUB,Publications RSO,36,2000.0
4,Berkeley Poetry Review,PUB,Publications RSO,25,1520.0
...,...,...,...,...,...
457,Volunteers Around the World at Berkeley,SISG,Service RSO,6,200.0
458,Volunteers for Medical Outreach,SISG,Service RSO,6,2200.0
459,You Mean More,SISG,Health & Wellness RSO,8,1500.0
460,Youth Empowerment Program,SISG,Service RSO,6,1850.0


# Load and clean list of organizations who requested appeals

In [20]:
appeals = pd.read_csv('C:\\Users\\scher\\school\\senior-thesis\\data\\appeals_2019.csv', names=['Organization'])
appeals

Unnamed: 0,Organization
0,"Alpha Phi Alpha Fraternity Inc., Alpha Epsilon..."
1,Toppa@Berkeley
2,Wave Makers of Berkeley
3,Swim Club at Berkeley
4,Anti-Trafficking Coalition at Berkeley
5,Global Medical Missions Alliance Berkeley
6,Cal Rotaract
7,Foresight Pre-Optometry Club
8,Cal Berkeley Democrats
9,Financial Literacy and Economic Justice Confer...


In [169]:
appeals['matched_names'] = appeals['Organization'].apply(lambda x: org_matcher(x, final))

The initial name is: Alpha Phi Alpha Fraternity Inc., Alpha Epsilon Chapter. 
 The closest match is: Alpha Epsilon Zeta Fraternity, Inc..
authorize match?
The initial name is: Toppa@Berkeley. 
 The closest match is: Toppa at Berkeley.
authorize match?y
The initial name is: Global Medical Missions Alliance Berkeley. 
 The closest match is: Global Medical Missions Alliance at Berkeley.
authorize match?y
The initial name is: Bears Closet. 
 The closest match is: Bears for Elder Welfare.
authorize match?
The initial name is: People's Test Preparation Services. 
 The closest match is: People's Test Preparation Service.
authorize match?y
The initial name is: Elections. 
 The closest match is: Berkeley Consulting.
authorize match?
The initial name is: Student Advocate's Office. 
 The closest match is: Clio's Scroll.
authorize match?
The initial name is: CalTV. 
 The closest match is: T-Cal.
authorize match?
The initial name is: Koinonia. 
 The closest match is: Koinonia Campus Fellowship.
aut

In [170]:
appeals

Unnamed: 0,Organization,matched_names
0,"Alpha Phi Alpha Fraternity Inc., Alpha Epsilon Chapter",
1,Toppa@Berkeley,Toppa at Berkeley
2,Wave Makers of Berkeley,Wave Makers of Berkeley
3,Swim Club at Berkeley,Swim Club at Berkeley
4,Anti-Trafficking Coalition at Berkeley,Anti-Trafficking Coalition at Berkeley
5,Global Medical Missions Alliance Berkeley,Global Medical Missions Alliance at Berkeley
6,Cal Rotaract,Cal Rotaract
7,Foresight Pre-Optometry Club,Foresight Pre-Optometry Club
8,Cal Berkeley Democrats,Cal Berkeley Democrats
9,Financial Literacy and Economic Justice Conference,Financial Literacy and Economic Justice Conference


In [171]:
appeals[appeals['matched_names'].isna()]

Unnamed: 0,Organization,matched_names
0,"Alpha Phi Alpha Fraternity Inc., Alpha Epsilon Chapter",
12,Bears Closet,
15,Elections,
19,Student Advocate's Office,
26,CalTV,
34,TBD Comedy,
43,Argentine Tango Club of Berkeley,
47,Community Projects,


In [179]:
appeals = appeals.drop([0,12,15,19,26,34,43,47])

In [180]:
appeals.to_csv('C:\\Users\\scher\\school\\senior-thesis\\data\\rso_appeals_2019.csv')

# Compare initial and final allocations

In [188]:
df = initial.merge(
    final[['Organization','Allocation']], 
    on='Organization').rename({'Allocation_x':'Initial Allocation', 'Allocation_y': 'Final Allocation'}, axis=1)

In [195]:
df = df.dropna()

In [199]:
total_initial = sum(df['Initial Allocation'])
total_final = sum(df['Final Allocation'])

print(f'The total initial allocation for RSOs was ${total_initial}, and ${total_final} was the final total.')
print(f'There was a ${total_final - total_initial} difference between the two.')

The total initial allocation for RSOs was $746935.0, and $811806.0 was the final total.
There was a $64871.0 difference between the two.


In [189]:
df[df['Initial Allocation'] != df['Final Allocation']]

Unnamed: 0,Organization,Type,Designation,Standing,Initial Allocation,Final Allocation
37,Alpha Phi Omega,SAG,Service RSO,34,2500.0,4000.0
44,Arab Student Union,SAG,Cultural & Identity/International RSO,8,568.0,1300.0
52,"Asha for Education, Berkeley",SAG,Service RSO,15,195.0,1420.0
99,Black Student Union,SAG,Cultural & Identity/Cultural & Ethnic RSO,10,500.0,25500.0
145,Disney Club at Berkeley,SAG,Recreational RSO,2,70.0,400.0
163,Foresight Pre-Optometry Club,SAG,Academic RSO,28,944.0,1175.0
178,Indian Students Association,SAG,Cultural & Identity/International RSO,16,2781.0,10000.0
250,"Pilipinx Association of Scientists, Architects, and Engineers",SAG,Professional RSO,31,2015.0,2100.0
318,UC Berkeley Model United Nations,SAG,Academic RSO,27,5500.0,9200.0
336,Wave Makers of Berkeley,SAG,Academic RSO,2,400.0,500.0


# 2018-2019 allocations