# My Script for Getting Summary Statistics for Each State

In [6]:
import pandas as pd
import numpy as np

In [7]:
def get_stats(df, name = ''):
    results = {
      'No. universities': len(df),
      'No. R1/R2': len(df[df['basic2021'].isin([15, 16])]),
      'No. HBCU': len(df[df['hbcu'] == 1]),
      'No. HSI': len(df[df['hsi'] == 1]),
      'No. Tribal': len(df[df['tribal'] == 1]),
      'No. MSI': len(df[df['msi'] == 1]),
      'No. Womens': len(df[df['womens'] == 1]),
      'No. Public': len(df[df['control'] == 1]),
      'No. Private NFP': len(df[df['control'] == 2]),
    }
    print(f'''
    # universities in {name}: {results[list(results.keys())[0]]}
    # R1/R2 universities in {name}: {results[list(results.keys())[1]]}
    # hbcu schools in {name}: {results[list(results.keys())[2]]}
    # hsi schools in {name}: {results[list(results.keys())[3]]}
    # tribal schools in {name}: {results[list(results.keys())[4]]}
    # msi schools in {name}: {results[list(results.keys())[5]]}
    # womens schools in {name}: {results[list(results.keys())[6]]}
    # public schools in {name}: {results[list(results.keys())[7]]}
    # private (not for profit) schools in {name}: {results[list(results.keys())[8]]}

      ''')
    return results

## Massachusetts

In [10]:
## Massachusetts is neighbored/surrounded by a few relevant states
state_of_interest = 'MA'
nearby_states = ['MA', 'NY', 'CT', 'NH', 'RI', 'ME', 'VT', 'NH']

# Load nearby states' universities
nearby_to_MA_universities = pd.read_csv('../data/carnegie_with_location.csv')[['name', 'basic2021',  'control', 'hbcu', 'hsi', 'tribal', 'msi', 'landgrnt', 'womens', 'stabbr', 'latitude', 'longitude']]
nearby_to_MA_universities = nearby_to_MA_universities[nearby_to_MA_universities['stabbr'].isin(nearby_states)]

# Also store just those in MA
MA_universities = nearby_to_MA_universities[nearby_to_MA_universities['stabbr'] == state_of_interest]

# Get stats
stats_MA = get_stats(MA_universities)
stats_nearby = get_stats(nearby_to_MA_universities)

# Combine
MA_university_summary = pd.DataFrame.from_dict({
    'Massachusetts': stats_MA,
    'Neighboring Massachusetts': stats_nearby
})
MA_university_summary.index.name = 'Categories'

MA_university_summary.to_csv('MA_summary_stats.csv')

MA_university_summary


    # universities in : 106
    # R1/R2 universities in : 13
    # hbcu schools in : 0
    # hsi schools in : 2
    # tribal schools in : 0
    # msi schools in : 2
    # womens schools in : 5
    # public schools in : 30
    # private (not for profit) schools in : 74

      

    # universities in : 524
    # R1/R2 universities in : 41
    # hbcu schools in : 0
    # hsi schools in : 24
    # tribal schools in : 0
    # msi schools in : 24
    # womens schools in : 6
    # public schools in : 167
    # private (not for profit) schools in : 323

      


Unnamed: 0_level_0,Massachusetts,Neighboring Massachusetts
Categories,Unnamed: 1_level_1,Unnamed: 2_level_1
No. universities,106,524
No. R1/R2,13,41
No. HBCU,0,0
No. HSI,2,24
No. Tribal,0,0
No. MSI,2,24
No. Womens,5,6
No. Public,30,167
No. Private NFP,74,323


## Wisconsin

In [8]:
## Wisconsin is neighbored/surrounded by a few relevant states
state_of_interest = 'WI'
nearby_states = ['WI', 'MI', 'MN', 'IA', 'IL']

# Load nearby states' universities
nearby_to_WI_universities = pd.read_csv('../data/carnegie_with_location.csv')[['name', 'basic2021',  'control', 'hbcu', 'hsi', 'tribal', 'msi', 'landgrnt', 'womens', 'stabbr', 'latitude', 'longitude']]
nearby_to_WI_universities = nearby_to_WI_universities[nearby_to_WI_universities['stabbr'].isin(nearby_states)]

# Also store just those in MA
WI_universities = nearby_to_WI_universities[nearby_to_WI_universities['stabbr'] == state_of_interest]

# Get stats
stats_WI = get_stats(WI_universities)
stats_nearby = get_stats(nearby_to_WI_universities)

# Combine
WI_university_summary = pd.DataFrame.from_dict({
    'Wisconsin': stats_WI,
    'Neighboring Wisconsin': stats_nearby
})
WI_university_summary.index.name = 'Categories'

WI_university_summary.to_csv('WI_summary_stats.csv')

WI_university_summary


    # universities in : 67
    # R1/R2 universities in : 3
    # hbcu schools in : 0
    # hsi schools in : 0
    # tribal schools in : 2
    # msi schools in : 2
    # womens schools in : 2
    # public schools in : 33
    # private (not for profit) schools in : 30

      

    # universities in : 447
    # R1/R2 universities in : 24
    # hbcu schools in : 0
    # hsi schools in : 11
    # tribal schools in : 9
    # msi schools in : 20
    # womens schools in : 3
    # public schools in : 203
    # private (not for profit) schools in : 214

      


Unnamed: 0_level_0,Wisconsin,Neighboring Wisconsin
Categories,Unnamed: 1_level_1,Unnamed: 2_level_1
No. universities,67,447
No. R1/R2,3,24
No. HBCU,0,0
No. HSI,0,11
No. Tribal,2,9
No. MSI,2,20
No. Womens,2,3
No. Public,33,203
No. Private NFP,30,214


## Georgia

In [9]:
## Georgia is neighbored/surrounded by a few relevant states
state_of_interest = 'GA'
nearby_states = ['GA', 'NC', 'SC', 'FL', 'AL', 'TN']

# Load nearby states' universities
nearby_to_GA_universities = pd.read_csv('../data/carnegie_with_location.csv')[['name', 'basic2021',  'control', 'hbcu', 'hsi', 'tribal', 'msi', 'landgrnt', 'womens', 'stabbr', 'latitude', 'longitude']]
nearby_to_GA_universities = nearby_to_GA_universities[nearby_to_GA_universities['stabbr'].isin(nearby_states)]

# Also store just those in MA
GA_universities = nearby_to_GA_universities[nearby_to_GA_universities['stabbr'] == state_of_interest]

# Get stats
stats_GA = get_stats(GA_universities)
stats_nearby = get_stats(nearby_to_GA_universities)

# Combine
GA_university_summary = pd.DataFrame.from_dict({
    'Georgia': stats_GA,
    'Neighboring Georgia': stats_nearby
})
GA_university_summary.index.name = 'Categories'

GA_university_summary.to_csv('GA_summary_stats.csv')

GA_university_summary


    # universities in : 107
    # R1/R2 universities in : 9
    # hbcu schools in : 9
    # hsi schools in : 0
    # tribal schools in : 0
    # msi schools in : 9
    # womens schools in : 4
    # public schools in : 50
    # private (not for profit) schools in : 36

      

    # universities in : 611
    # R1/R2 universities in : 43
    # hbcu schools in : 50
    # hsi schools in : 18
    # tribal schools in : 0
    # msi schools in : 68
    # womens schools in : 9
    # public schools in : 261
    # private (not for profit) schools in : 231

      


Unnamed: 0_level_0,Georgia,Neighboring Georgia
Categories,Unnamed: 1_level_1,Unnamed: 2_level_1
No. universities,107,611
No. R1/R2,9,43
No. HBCU,9,50
No. HSI,0,18
No. Tribal,0,0
No. MSI,9,68
No. Womens,4,9
No. Public,50,261
No. Private NFP,36,231
