# VADIR Data Cleaning Part 2.
Source: CSV file generated by VADIR_data_cleanup
        
#### TO DOs (carried over from last file):
* ~~clean up uppercase/lowercase issues (school name and county)~~
* ~~fix column name duplicates and typos (alcohol and drug possession, sex offenses)~~
* ~~check that schools are consistently assigned the same district name~~
* deal with same school having different names (mostly School @ end?)
* figure out a way to handle the data that doesn't have county info (search for school's subsequent year data?)
* get lattitude and longitude (and addresses???)
* ~~re order columns in some meaningful way~~
* start computing tallies of incidents with and without weapons
* check datatypes for columns to make sure they make sense.
* ~~whats with the 5 missing values?~~


In [1]:
# Initial Imports
import numpy as np
import pandas as pd
from pandas import DataFrame, Series
from matplotlib import pyplot as plt
% matplotlib inline

In [2]:
# read in saved dataframe
vadir_df = pd.read_csv("VADIR_incidents_2006-14.csv", dtype = object)

### Reordering Columns 

In [3]:
# Demographic Categories and their initial indices:
# County[12], District[15], School Name[46], BEDS code[8], School Year[48], Enrollment [18]
# Grade Organization[22], Need/Resource Category[33], School Type[47]
cols = vadir_df.columns.tolist()
new_order = [12, 15, 46, 48, 18, 8, 22, 33, 47] + list(range(1,8)) + [9, 19, 10, 11, 13, 14, 16, 17, 20, 21]
new_order += list(range(23, 33)) + list(range(34, 46)) + [49, 50, 51, 52]
cols = [cols[idx] for idx in new_order]
vadir_df = vadir_df[cols]

### Merging Duplicate Columns

In [4]:
# rename 'False Alarm' to "Bomb Threat False Alarm"
vadir_df.rename(columns={'False Alarm':"Bomb Threat False Alarm"}, inplace=True)

In [5]:
# Check for and merge/delete duplicate column names
#vadir_df.columns.tolist()

In [6]:
# Merge 'Alcohol Possesion' and 'Use Possession or Sale of Alcohol' into'Alcohol Possession' column
print("Dataframe Length: {}".format(len(vadir_df)), 
       "Missing Values for this column: {}\n".format((vadir_df['Alcohol Possession'].isnull()).sum()),
      "# of Values in first column to merge:{}\n".format((vadir_df['Alcohol Possesion'].notnull()).sum()),
      "# of Values in second:{}\n".format((vadir_df['Use Possession or Sale of Alcohol'].notnull()).sum()))
     
vadir_df['Alcohol Possession'] = vadir_df['Alcohol Possession'].combine_first(vadir_df['Alcohol Possesion'])
vadir_df['Alcohol Possession'] = vadir_df['Alcohol Possession'].combine_first(vadir_df['Use Possession or Sale of Alcohol'])
vadir_df.drop(['Alcohol Possesion', 'Use Possession or Sale of Alcohol'], axis=1, inplace=True)

print("...'Alcohol Possesion' and 'Use Possession or Sale of Alcohol' merged into'Alcohol Possession' column",
      "\n... which now has {} remaining missing values.".format((vadir_df['Alcohol Possession'].isnull()).sum()),
      " Former columns dropped.")

Dataframe Length: 14734 Missing Values for this column: 2956
 # of Values in first column to merge:1498
 # of Values in second:1453

...'Alcohol Possesion' and 'Use Possession or Sale of Alcohol' merged into'Alcohol Possession' column 
... which now has 5 remaining missing values.  Former columns dropped.


In [7]:
# Merge 'Drug Possesion' and 'Use Possession or Sale of Drugs' into'Drug Possession' column
print("Dataframe Length: {}".format(len(vadir_df)), 
       "Missing Values for this column: {}\n".format((vadir_df['Drug Possession'].isnull()).sum()),
      "# of Values in first column to merge:{}\n".format((vadir_df['Drug Possesion'].notnull()).sum()),
      "# of Values in second:{}\n".format((vadir_df['Use Possession or Sale of Drugs'].notnull()).sum()))
     
vadir_df['Drug Possession'] = vadir_df['Drug Possession'].combine_first(vadir_df['Drug Possesion'])
vadir_df['Drug Possession'] = vadir_df['Drug Possession'].combine_first(vadir_df['Use Possession or Sale of Drugs'])
vadir_df.drop(['Drug Possesion', 'Use Possession or Sale of Drugs'], axis=1, inplace=True)

print("...'Drug Possesion' and 'Use Possession or Sale of Drugs' merged into'Drug Possession' column",
      "\n... which now has {} remaining missing values.".format((vadir_df['Drug Possession'].isnull()).sum()),
      " Former columns dropped.")

Dataframe Length: 14734 Missing Values for this column: 2956
 # of Values in first column to merge:1498
 # of Values in second:1453

...'Drug Possesion' and 'Use Possession or Sale of Drugs' merged into'Drug Possession' column 
... which now has 5 remaining missing values.  Former columns dropped.


In [8]:
# Merge 'Other Disruptive' into 'Other Disruptive Incidents'
print("Dataframe Length: {}".format(len(vadir_df)), 
       "Missing Values for this column: {}\n".format((vadir_df['Other Disruptive Incidents'].isnull()).sum()),
      "# of Values in column to merge:{}\n".format((vadir_df['Other Disruptive'].notnull()).sum()))
     
vadir_df['Other Disruptive Incidents'] = vadir_df['Other Disruptive Incidents'].combine_first(vadir_df['Other Disruptive'])
vadir_df.drop(['Other Disruptive'], axis=1, inplace=True)

print("...'Other Disruptive' merged into 'Other Disruptive Incidents' column",
      "which now has {} remaining missing values.".format((vadir_df['Other Disruptive Incidents'].isnull()).sum()),
      "\n... Former column dropped.")

Dataframe Length: 14734 Missing Values for this column: 13281
 # of Values in column to merge:13275

...'Other Disruptive' merged into 'Other Disruptive Incidents' column which now has 6 remaining missing values. 
... Former column dropped.


In [9]:
# Merge'Other Sex offenses_nw' into'Other Sex Offenses_nw',
print("Dataframe Length: {}".format(len(vadir_df)), 
       "Missing Values for this column: {}\n".format((vadir_df['Other Sex Offenses_nw'].isnull()).sum()),
      "# of Values in column to merge:{}\n".format((vadir_df['Other Sex offenses_nw'].notnull()).sum()))
     
vadir_df['Other Sex Offenses_nw'] = vadir_df['Other Sex Offenses_nw'].combine_first(vadir_df['Other Sex offenses_nw'])
vadir_df.drop(['Other Sex offenses_nw'], axis=1, inplace=True)

print("...'Other Sex offenses_nw' merged into 'Other Sex Offenses_nw' column",
      "which now has {} remaining missing values.".format((vadir_df['Other Sex Offenses_nw'].isnull()).sum()),
      "\n... Former column dropped.")

Dataframe Length: 14734 Missing Values for this column: 1458
 # of Values in column to merge:1453

...'Other Sex offenses_nw' merged into 'Other Sex Offenses_nw' column which now has 5 remaining missing values. 
... Former column dropped.


In [10]:
# Merge'Other Sex offenses_ww' into'Other Sex Offenses_ww',
print("Dataframe Length: {}".format(len(vadir_df)), 
       "Missing Values for this column: {}\n".format((vadir_df['Other Sex Offenses_ww'].isnull()).sum()),
      "# of Values in column to merge:{}\n".format((vadir_df['Other Sex offenses_ww'].notnull()).sum()))
     
vadir_df['Other Sex Offenses_ww'] = vadir_df['Other Sex Offenses_ww'].combine_first(vadir_df['Other Sex offenses_ww'])
vadir_df.drop(['Other Sex offenses_ww'], axis=1, inplace=True)

print("...'Other Sex offenses_ww' merged into 'Other Sex Offenses_ww' column",
      "which now has {} remaining missing values.".format((vadir_df['Other Sex Offenses_ww'].isnull()).sum()),
      "\n... Former column dropped.")

Dataframe Length: 14734 Missing Values for this column: 1458
 # of Values in column to merge:1453

...'Other Sex offenses_ww' merged into 'Other Sex Offenses_ww' column which now has 5 remaining missing values. 
... Former column dropped.


In [11]:
# Look at Need/Resource Category and School Type to see if they should be merged
print(vadir_df['Need/Resource Category'].value_counts())
print(vadir_df['School Type'].value_counts())

New York City     9448
Charter School     779
Name: Need/Resource Category, dtype: int64
Public            13762
Charter             844
Charter School      124
Name: School Type, dtype: int64


In [12]:
# Yes, they look like they overlap... infact we can maybe just delete the need/resource category
vadir_df.drop(['Need/Resource Category'], axis=1, inplace=True)

In [13]:
# Take a look at the now cleaned column names
vadir_df.columns.tolist()

['County',
 'District',
 'School Name',
 'School Year',
 'Enrollment',
 'BEDS Code',
 'Grade Organization',
 'School Type',
 'Alcohol Possession',
 'Arson',
 'Assault With Serious Physical Injury_nw',
 'Assault With Serious Physical Injury_ww',
 'Assault with Physical Injury_nw',
 'Assault with Physical Injury_ww',
 'Bomb Threat',
 'Bomb Threat False Alarm',
 'Burglary_nw',
 'Burglary_ww',
 'Criminal Mischief_nw',
 'Criminal Mischief_ww',
 'Drug Possession',
 'Forcible Sex Offenses_nw',
 'Forcible Sex Offenses_ww',
 'Homicide_nw',
 'Homicide_ww',
 'Intimidation, Harassment, Menacing, or Bullying_nw',
 'Intimidation, Harassment, Menacing, or Bullying_ww',
 'Kidnapping_nw',
 'Kidnapping_ww',
 'Larceny, or Other Theft_nw',
 'Larceny, or Other Theft_ww',
 'Minor Altercations_nw',
 'Minor Altercations_ww',
 'Other Disruptive Incidents',
 'Other Sex Offenses_nw',
 'Other Sex Offenses_ww',
 'Reckless Endangerment_nw',
 'Reckless Endangerment_ww',
 'Riot_nw',
 'Riot_ww',
 'Robbery_nw',
 'Robbe

### Removing Problem Rows

In [14]:
# School Type NaNs
vadir_df[vadir_df['School Type'].isnull()]
#BEDS Code NaNs
vadir_df[vadir_df['BEDS Code'].isnull()]
# School Name NaNs
vadir_df[vadir_df['School Name'].isnull()]
# Other Disruptive Incidents NaNs
vadir_df[vadir_df['Other Disruptive Incidents'].isnull()]

Unnamed: 0,County,District,School Name,School Year,Enrollment,BEDS Code,Grade Organization,School Type,Alcohol Possession,Arson,...,Other Sex Offenses_nw,Other Sex Offenses_ww,Reckless Endangerment_nw,Reckless Endangerment_ww,Riot_nw,Riot_ww,Robbery_nw,Robbery_ww,Weapon Possession_nw,Weapon Possession_ww
10440,,New York City Geographic District # 3,Ms 246 Crossroads,2008,125.0,310300010246.0,,Public,0.0,1.0,...,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
11588,,,Peninsula Preparatory Academy Charter School,2008,299.0,342700860869.0,,Charter,,,...,,,,,,,,,,
13277,,,,2007,,,,,,,...,,,,,,,,,,
13278,"In a small number of incidents, a school's Sch...",,,2007,,,,,,,...,,,,,,,,,,
14732,*Enrollment is the PK-12 BEDS I-Day and inclu...,,,2006,,,,,,,...,,,,,,,,,,
14733,**2006-07 data had been revised based on site ...,,,2006,,,,,,,...,,,,,,,,,,


In [15]:
# Qll of the above come up with the same 4 problem rows indexed: 13277, 13278, 14732, 14733 
# They appear to have been mistakenly added when loading 2006 and 07 data.
#... we'll delete them.
vadir_df.drop([13277, 13278, 14732, 14733], axis=0, inplace=True)
#... and reindex
vadir_df.reindex()

Unnamed: 0,County,District,School Name,School Year,Enrollment,BEDS Code,Grade Organization,School Type,Alcohol Possession,Arson,...,Other Sex Offenses_nw,Other Sex Offenses_ww,Reckless Endangerment_nw,Reckless Endangerment_ww,Riot_nw,Riot_ww,Robbery_nw,Robbery_ww,Weapon Possession_nw,Weapon Possession_ww
0,Bronx,,Academic Leadership Charter School,2014,376,320700860957,Elementary,Charter,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Bronx,,American Dream Charter School,2014,81,320700861062,,Charter,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Bronx,,Brilla College Preparatory Charter School,2014,247,320700861014,Elementary,Charter,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Bronx,,Bronx Academy Of Promise Charter School,2014,581,320900860913,Elementary,Charter,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Bronx,,Bronx Charter School For Better Learning,2014,470,321100860855,Elementary,Charter,0,0,...,0,0,9,0,0,0,0,0,0,0
5,Bronx,,Bronx Charter School For Children,2014,424,320700860852,Elementary,Charter,0,0,...,3,0,12,0,0,0,0,0,3,0
6,Bronx,,Bronx Charter School For Excellence,2014,636,321100860859,Elementary,Charter,0,0,...,0,0,0,0,0,0,0,0,0,0
7,Bronx,,Bronx Charter School For The Arts,2014,315,320800860846,Elementary,Charter,1,0,...,0,0,0,0,0,0,0,0,1,1
8,Bronx,,Bronx Community Charter School,2014,352,321000860914,Elementary,Charter,0,0,...,0,0,0,0,0,0,0,0,0,0
9,Bronx,,"Bronx Global Learning Institute For Girls, The...",2014,375,320700860915,Elementary,Charter,0,0,...,0,0,0,0,0,0,0,0,0,0


In [16]:
#Check that they're gone
vadir_df[vadir_df['Other Disruptive Incidents'].isnull()]

Unnamed: 0,County,District,School Name,School Year,Enrollment,BEDS Code,Grade Organization,School Type,Alcohol Possession,Arson,...,Other Sex Offenses_nw,Other Sex Offenses_ww,Reckless Endangerment_nw,Reckless Endangerment_ww,Riot_nw,Riot_ww,Robbery_nw,Robbery_ww,Weapon Possession_nw,Weapon Possession_ww
10440,,New York City Geographic District # 3,Ms 246 Crossroads,2008,125,310300010246,,Public,0.0,1.0,...,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
11588,,,Peninsula Preparatory Academy Charter School,2008,299,342700860869,,Charter,,,...,,,,,,,,,,


### Fixing Categorical Data Inconsistencies

In [17]:
# Look at County values:
print(vadir_df['County'].value_counts())
print('... and', vadir_df['County'].isnull().sum(), 'Null Values')

BROOKLYN              1911
Kings                 1655
BRONX                 1469
Bronx                 1268
QUEENS                1225
New York              1040
Queens                1035
MANHATTAN              937
MANHATTEN              291
RICHMOND               259
Richmond               219
NYC CENTRAL OFFICE     172
Nyc Central Office     114
NYC Central Office      58
Nassau                   1
Name: County, dtype: int64
... and 3076 Null Values


In [18]:
# Adjust 'County' values to all have the same capitalization
vadir_df['County']= vadir_df['County'].apply(lambda x: x.title() if type(x) == type('s') else x)

# Examine County Values to confirm changes
print(vadir_df['County'].value_counts())

Bronx                 2737
Queens                2260
Brooklyn              1911
Kings                 1655
New York              1040
Manhattan              937
Richmond               478
Nyc Central Office     344
Manhatten              291
Nassau                   1
Name: County, dtype: int64


In [37]:
# Checking District values
print(vadir_df['District'].value_counts())

# ... could use some editing for sussinctness but I'll leave it alone for now until we
# decide how we're going to handle the 'location question'

New York City Geographic District # 2           940
New York City Geographic District #10           738
New York City Geographic District # 9           613
New York City Geographic District #31           588
New York City Geographic District #11           524
New York City Geographic District #27           510
New York City Geographic District # 8           477
New York City Geographic District #17           458
Nyc Special Schools - District 75               456
New York City Geographic District #12           451
New York City Geographic District #24           445
New York City Geographic District # 6           412
New York City Geographic District #19           400
New York City Geographic District #15           400
New York City Geographic District # 3           393
New York City Geographic District #29           389
New York City Geographic District #28           388
New York City Geographic District #13           386
New York City Geographic District #30           386
New York Cit

In [20]:
# Look at School Type
vadir_df['School Type'].value_counts()

Public            13762
Charter             844
Charter School      124
Name: School Type, dtype: int64

In [21]:
# Get indices with School type'Charter School'
indices = vadir_df[vadir_df['School Type'] == 'Charter School'].index.tolist()
vadir_df.loc[indices, 'School Type'] = 'Charter'

# Check counts to confirm change
vadir_df['School Type'].value_counts()

Public     13762
Charter      968
Name: School Type, dtype: int64

In [22]:
# Look at School Names and BEDS Codes
print("# of Distinct School Names:", len(vadir_df['School Name'].unique()))
print("# of Distinct BEDS Codes:", len(vadir_df['BEDS Code'].unique()))

# of Distinct School Names: 4930
# of Distinct BEDS Codes: 1967


In [23]:
# Adjust 'School Name' values to all have the same capitalization
vadir_df['School Name']= vadir_df['School Name'].apply(lambda x: x.title() if type(x) == type('s') else x)

In [24]:
# Look again at School Names and BEDS Codes
print("# of Distinct School Names:", len(vadir_df['School Name'].unique()))
print("# of Distinct BEDS Codes:", len(vadir_df['BEDS Code'].unique()))

# of Distinct School Names: 3135
# of Distinct BEDS Codes: 1967


In [25]:
# Trying to track down school overlaps
PROBLEM_SCHOOLS = {}
for code, df in vadir_df.groupby('BEDS Code'):
    if len(df['School Name'].unique()) > 1:
        PROBLEM_SCHOOLS[code] = len(df['School Name'].unique())

print('TOTAL:', len(PROBLEM_SCHOOLS))
print('FIRST FIVE:')
count = 0
for code, num in PROBLEM_SCHOOLS.items():
    if count > 5:
        break
    else:
        print('... code:', code, '# names:', num)
        count += 1

TOTAL: 953
FIRST FIVE:
... code: 332000010170 # names: 2
... code: 310300010250 # names: 2
... code: 332100010096 # names: 2
... code: 307500013372 # names: 3
... code: 331300010266 # names: 2
... code: 331400010034 # names: 2


In [26]:
# Aha.... this is going to be a pain to fix.
vadir_df[vadir_df['BEDS Code'] == '353100010004']

Unnamed: 0,County,District,School Name,School Year,Enrollment,BEDS Code,Grade Organization,School Type,Alcohol Possession,Arson,...,Other Sex Offenses_nw,Other Sex Offenses_ww,Reckless Endangerment_nw,Reckless Endangerment_ww,Riot_nw,Riot_ww,Robbery_nw,Robbery_ww,Weapon Possession_nw,Weapon Possession_ww
1773,Richmond,New York City Geographic District #31,Ps 4 Maurice Wollin,2014,804,353100010004,Elementary,Public,0,0,...,0,0,0,0,0,0,0,0,2,0
3569,Richmond,New York City Geographic District #31,Ps 4 Maurice Wollin,2013,769,353100010004,Elementary,Public,0,0,...,1,0,1,0,0,0,0,0,0,0
5304,Richmond,New York City Geographic District #31,Ps 4 Maurice Wollin,2012,791,353100010004,Elementary,Public,0,0,...,0,0,0,0,0,0,0,0,0,0
6998,Richmond,New York City Geographic District #31,Ps 4 Maurice Wollin,2011,796,353100010004,Elementary,Public,0,0,...,1,0,0,0,0,0,0,0,0,0
8675,Richmond,New York City Geographic District #31,Ps 4 Maurice Wollin,2010,806,353100010004,Elementary,Public,0,0,...,0,0,0,0,0,0,0,0,1,0
10172,,New York City Geographic District #31,Ps 4 Maurice Wollin,2009,792,353100010004,Elementary,Public,0,0,...,2,0,0,0,0,0,0,0,0,0
11718,,New York City Geographic District #31,Ps 4 Maurice Wollin,2008,792,353100010004,,Public,0,0,...,0,0,0,0,0,0,0,0,0,0
13219,Richmond,New York City Geographic District #31,Ps 4 Maurice Wollin School,2007,779,353100010004,,Public,0,1,...,0,0,0,0,0,0,0,0,0,0
14707,Richmond,New York City Geographic District #31,Ps 4 Maurice Wollin School,2006,764,353100010004,,Public,0,0,...,0,0,0,0,0,0,0,0,0,0


### Tally number of incidents in total, with weapons, without weapons.

In [27]:
# get numeric values
COUNT_COLUMNS = vadir_df.columns[8:].tolist()

In [28]:
# convert data types
vadir_df[COUNT_COLUMNS] = vadir_df[COUNT_COLUMNS].apply(lambda x: pd.to_numeric(x))

In [29]:
# compute tallies
vadir_df['Total Incidents'] = vadir_df[COUNT_COLUMNS].sum(axis=1)

WEAPON_COLS = [x for x in COUNT_COLUMNS if x[-3:] == '_ww']
vadir_df['Incidents w/ Weapons'] = vadir_df[WEAPON_COLS].sum(axis=1)

NO_WEAPON_COLS = [x for x in COUNT_COLUMNS if x[-3:] == '_nw']
vadir_df['Incidents w/o Weapons'] = vadir_df[NO_WEAPON_COLS].sum(axis=1)

# reorder columns
ORDER = ['County', 'District', 'School Name', 'School Year', 'Enrollment', 'BEDS Code',
         'Grade Organization','School Type','Total Incidents','Incidents w/ Weapons',
         'Incidents w/ Weapons'] + COUNT_COLUMNS
vadir_df = vadir_df[ORDER]
vadir_df.head()

Unnamed: 0,County,District,School Name,School Year,Enrollment,BEDS Code,Grade Organization,School Type,Total Incidents,Incidents w/ Weapons,...,Other Sex Offenses_nw,Other Sex Offenses_ww,Reckless Endangerment_nw,Reckless Endangerment_ww,Riot_nw,Riot_ww,Robbery_nw,Robbery_ww,Weapon Possession_nw,Weapon Possession_ww
0,Bronx,,Academic Leadership Charter School,2014,376,320700860957,Elementary,Charter,2,0,...,0,0,0,0,0,0,0,0,0,0
1,Bronx,,American Dream Charter School,2014,81,320700861062,,Charter,12,8,...,0,0,0,0,0,0,0,0,0,0
2,Bronx,,Brilla College Preparatory Charter School,2014,247,320700861014,Elementary,Charter,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Bronx,,Bronx Academy Of Promise Charter School,2014,581,320900860913,Elementary,Charter,5,5,...,0,0,0,0,0,0,0,0,0,0
4,Bronx,,Bronx Charter School For Better Learning,2014,470,321100860855,Elementary,Charter,49,27,...,0,0,9,0,0,0,0,0,0,0


### Save Cleaned Data Frame to File

In [34]:
vadir_df.head()

Unnamed: 0,County,District,School Name,School Year,Enrollment,BEDS Code,Grade Organization,School Type,Total Incidents,Incidents w/ Weapons,...,Other Sex Offenses_nw,Other Sex Offenses_ww,Reckless Endangerment_nw,Reckless Endangerment_ww,Riot_nw,Riot_ww,Robbery_nw,Robbery_ww,Weapon Possession_nw,Weapon Possession_ww
0,Bronx,,Academic Leadership Charter School,2014,376,320700860957,Elementary,Charter,2,0,...,0,0,0,0,0,0,0,0,0,0
1,Bronx,,American Dream Charter School,2014,81,320700861062,,Charter,12,8,...,0,0,0,0,0,0,0,0,0,0
2,Bronx,,Brilla College Preparatory Charter School,2014,247,320700861014,Elementary,Charter,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Bronx,,Bronx Academy Of Promise Charter School,2014,581,320900860913,Elementary,Charter,5,5,...,0,0,0,0,0,0,0,0,0,0
4,Bronx,,Bronx Charter School For Better Learning,2014,470,321100860855,Elementary,Charter,49,27,...,0,0,9,0,0,0,0,0,0,0


In [35]:
# Save results to a 'clean' file.
vadir_df.to_csv("VADIR_clean.csv", index = False)

In [36]:
# Check that is saved
pd.read_csv("VADIR_clean.csv").head()

Unnamed: 0,County,District,School Name,School Year,Enrollment,BEDS Code,Grade Organization,School Type,Total Incidents,Incidents w/ Weapons,...,Other Sex Offenses_nw,Other Sex Offenses_ww,Reckless Endangerment_nw,Reckless Endangerment_ww,Riot_nw,Riot_ww,Robbery_nw,Robbery_ww,Weapon Possession_nw,Weapon Possession_ww
0,Bronx,,Academic Leadership Charter School,2014,376,320700860957,Elementary,Charter,2,0,...,0,0,0,0,0,0,0,0,0,0
1,Bronx,,American Dream Charter School,2014,81,320700861062,,Charter,12,8,...,0,0,0,0,0,0,0,0,0,0
2,Bronx,,Brilla College Preparatory Charter School,2014,247,320700861014,Elementary,Charter,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Bronx,,Bronx Academy Of Promise Charter School,2014,581,320900860913,Elementary,Charter,5,5,...,0,0,0,0,0,0,0,0,0,0
4,Bronx,,Bronx Charter School For Better Learning,2014,470,321100860855,Elementary,Charter,49,27,...,0,0,9,0,0,0,0,0,0,0
