## Define a function

In [1]:
# Dependencies
import openml
import pandas as pd
from IPython.display import display

# Function to visualize datasets form OpenML
def download_openml_dataset(dataset_id):
    # This line will download the dataset from OpenML
    dataset = openml.datasets.get_dataset(dataset_id)

    # Get the predictors and target as a pandas DataFrame and Series, respectively
    X, y, categorical_indicator, attribute_names = dataset.get_data(
        dataset_format='dataframe',
        target=dataset.default_target_attribute
    )

    # Display the first few rows of the DataFrame
    pd.set_option('display.max_columns', 50)  # this will display up to 50 columns
    display(X.head())

    # Print the total number of rows
    print("\nTotal number of rows in X:", X.shape[0])

    # Additional metrics for 'y'
    target_name = dataset.default_target_attribute
    print(f"\nTarget variable ('y') is '{target_name}' with the following metrics:")
    print("Min of 'y':", y.min())
    print("Max of 'y':", y.max())
    print("Average of 'y':", y.mean())

    # Return the predictors and target
    return X, y


## Red wine data, 44972

In [2]:
X, y = download_openml_dataset(44972)

Unnamed: 0,fixed_acidity,volatile_acidity,citric_acid,residual_sugar,chlorides,free_sulfur_dioxide,total_sulfur_dioxide,density,pH,sulphates,alcohol
0,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4
1,7.8,0.88,0.0,2.6,0.098,25.0,67.0,0.9968,3.2,0.68,9.8
2,7.8,0.76,0.04,2.3,0.092,15.0,54.0,0.997,3.26,0.65,9.8
3,11.2,0.28,0.56,1.9,0.075,17.0,60.0,0.998,3.16,0.58,9.8
4,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4



Total number of rows in X: 1599

Target variable ('y') is 'quality' with the following metrics:
Min of 'y': 3
Max of 'y': 8
Average of 'y': 5.6360225140712945


## Wine reviews, 42074

In [3]:
X, y = download_openml_dataset(42074)

Unnamed: 0,country,description,designation,points,price,province,region_1,region_2,variety,winery
0,US,This tremendous 100% varietal wine hails from ...,Martha's Vineyard,96,235.0,California,Napa Valley,Napa,Cabernet Sauvignon,Heitz
1,Spain,"Ripe aromas of fig, blackberry and cassis are ...",Carodorum Selección Especial Reserva,96,110.0,Northern Spain,Toro,,Tinta de Toro,Bodega Carmen Rodríguez
2,US,Mac Watson honors the memory of a wine once ma...,Special Selected Late Harvest,96,90.0,California,Knights Valley,Sonoma,Sauvignon Blanc,Macauley
3,US,"This spent 20 months in 30% new French oak, an...",Reserve,96,65.0,Oregon,Willamette Valley,Willamette Valley,Pinot Noir,Ponzi
4,France,"This is the top wine from La Bégude, named aft...",La Brûlade,95,66.0,Provence,Bandol,,Provence red blend,Domaine de la Bégude



Total number of rows in X: 150930

Target variable ('y') is 'None' with the following metrics:


AttributeError: 'NoneType' object has no attribute 'min'

## Wine reviews, 43600

In [4]:
X, y = download_openml_dataset(43600)

Unnamed: 0,country,description,designation,points,price,province,region_1,region_2,taster_name,taster_photo,taster_twitter_handle,title,variety,vintage,winery
0,Portugal,This is a deliciously creamy wine with light w...,Assobio Branco,87,14.0,Douro,,,Roger Voss,https://253qv1sx4ey389p9wtpp9sj0-wpengine.netd...,vossroger,Quinta dos Muras 2016 Assobio Branco White (Do...,Portuguese White,2016,Quinta dos Muras
1,US,"Black plum juice, black pepper, caramel and sm...",,87,25.0,California,Paso Robles,Central Coast,Matt Kettmann,https://253qv1sx4ey389p9wtpp9sj0-wpengine.netd...,mattkettmann,Western Slope 2014 Cabernet Sauvignon (Paso Ro...,Cabernet Sauvignon,2014,Western Slope
2,Georgia,Aromas of green apple and white flowers prepar...,,87,14.0,Lechkhumi,,,Mike DeSimone,https://253qv1sx4ey389p9wtpp9sj0-wpengine.netd...,worldwineguys,Teliani Valley 2015 Tsolikouri (Lechkhumi),Tsolikouri,2015,Teliani Valley
3,Kosovo,"This wine has aromas of black berry, dried red...",,87,13.0,Rahoveci Valley,,,Jeff Jenssen,https://253qv1sx4ey389p9wtpp9sj0-wpengine.netd...,worldwineguys,Stone Castle 2013 Shiraz (Rahoveci Valley),Shiraz,2013,Stone Castle
4,Italy,"A blend of organically cultivated Groppello, M...",San'Emiliano Chiaretto,87,13.0,Lombardy,Valtnesi,,Kerin OKeefe,https://253qv1sx4ey389p9wtpp9sj0-wpengine.netd...,kerinokeefe,Pratello 2016 San'Emiliano Chiaretto Rosato (V...,Rosato,2016,Pratello



Total number of rows in X: 81115

Target variable ('y') is 'None' with the following metrics:


AttributeError: 'NoneType' object has no attribute 'min'

# global wine points, from data.world

In [5]:
df = pd.read_excel('https://query.data.world/s/7dgdguhyvdg7gayzoavk2cz5waec45?dws=00000')
display(df.head())
print("\nTotal number of rows in X:", df.shape[0])

Unnamed: 0,Vintage,Country,County,Designation,Points,Price,Province,Title,Variety,Winery
0,1919-01-01 00:00:00,Spain,Cava,1919 Brut Selecció,88,$13.00,Catalonia,L'Arboc NV 1919 Brut Selecció Sparkling (Cava),Sparkling Blend,L'Arboc
1,1929-01-01 00:00:00,Italy,Vernaccia di San Gimignano,,87,$14.00,Tuscany,Guidi 1929 2015 Vernaccia di San Gimignano,Vernaccia,Guidi 1929
2,1929-01-01 00:00:00,Italy,Sangiovese di Romagna Superiore,Prugneto,84,$15.00,Central Italy,Poderi dal Nespoli 1929 2011 Prugneto (Sangiov...,Sangiovese,Poderi dal Nespoli 1929
3,1934-01-01 00:00:00,Portugal,,Reserva Velho,93,$495.00,Colares,Adega Viuva Gomes 1934 Reserva Velho Red (Cola...,Ramisco,Adega Viuva Gomes
4,1945-01-01 00:00:00,France,Rivesaltes,Legend Vintage,95,$350.00,Languedoc-Roussillon,Gérard Bertrand 1945 Legend Vintage Red (Rives...,Red Blend,Gérard Bertrand



Total number of rows in X: 24997


-------------------------------------------------------------------------------

## Gun violence, School Shootings, https://www.gunviolencearchive.org/

In [7]:
# Read the CSV data
df = pd.read_csv('/Users/a/Desktop/ClassFolder_040123/final_project/school_shootings.csv')

# Display the first few rows of the DataFrame
print("\nTotal number of rows in df:", df.shape[0])
display(df.sample(30))


Total number of rows in df: 2000


Unnamed: 0,Incident ID,Incident Date,State,City Or County,Address,# Victims Injured,# Victims Killed,# Subjects-Suspects Injured,# Subjects-Suspects Killed,# Subjects-Suspects Arrested,Operations
766,2447895,"October 28, 2022",Oklahoma,Yukon,10901 SW 15th St,0,0,0,0,1,
375,2529696,"February 17, 2023",Nevada,Las Vegas,333 S Pavilion Center Dr,0,0,0,0,1,
43,2594470,"May 10, 2023",Michigan,Grand Rapids,863 7th St NW,0,0,0,0,0,
1890,2162602,"October 29, 2021",Virginia,Chesapeake,4410 Airline Blvd,0,0,0,0,0,
1043,2412964,"September 12, 2022",Texas,Corpus Christi,3900 Hamlin Dr,0,0,0,0,0,
1224,2327173,"June 9, 2022",Mississippi,Byhalia,278 MS-309,0,0,0,0,1,
1939,2146097,"October 19, 2021",Indiana,Indianapolis,1200 block of Campus Dr,0,1,0,0,0,
1199,2385453,"August 5, 2022",Indiana,Lafayette,1801 S 18th St,0,0,0,0,1,
1767,2185675,"December 3, 2021",California,Lancaster,3730 E Avenue J,0,0,0,0,0,
1661,2214981,"January 19, 2022",Georgia,Atlanta,551 John Wesley Dobbs Ave NE,0,0,0,0,0,


## Gun violence, Mass Shootings, https://www.gunviolencearchive.org/

In [8]:
# Read the CSV data
df = pd.read_csv('/Users/a/Desktop/ClassFolder_040123/final_project/mass_shooting_all_years.csv')

# Display the first few rows of the DataFrame
print("\nTotal number of rows in df:", df.shape[0])
display(df.sample(30))


Total number of rows in df: 2000


Unnamed: 0,Incident ID,Incident Date,State,City Or County,Address,# Victims Injured,# Victims Killed,# Subjects-Suspects Injured,# Subjects-Suspects Killed,# Subjects-Suspects Arrested,Operations
1726,1800188,"September 19, 2020",New Jersey,Plainfield,523 W 3rd St,4,0,0,0,0,
1695,1815308,"October 5, 2020",District of Columbia,Washington,400 block of Orange St SE,3,1,0,0,0,
293,2459150,"November 13, 2022",Virginia,Charlottesville,130 Culbreth St,2,3,0,0,1,
1401,1993664,"May 2, 2021",Pennsylvania,Philadelphia,5200 block of Burton St,3,1,0,0,0,
1778,1780091,"August 30, 2020",Kentucky,Madisonville,400 block of Elm St,5,0,0,0,0,
1634,1850693,"November 12, 2020",North Carolina,Gastonia,1225 Union Rd,6,0,0,0,2,
1633,1850981,"November 12, 2020",Florida,Tampa,1200 block of E Palifox St,4,2,0,0,0,
354,2436826,"October 13, 2022",North Carolina,Raleigh,5300 block of Sahalee Way,2,5,1,0,1,
244,2490925,"December 30, 2022",Texas,Humble,4800 block of Park Square Ln,3,2,0,0,0,
1433,1979869,"April 15, 2021",Louisiana,Lake Charles,4101 5th Ave,3,1,0,0,1,


## Firearm deaths, USA, data.world

In [9]:
# Read the CSV data
df = pd.read_csv('/Users/a/Desktop/ClassFolder_040123/final_project/firearm_deaths_usafacts.csv')

# Display the first few rows of the DataFrame
display(df.sample(30))
print("\nTotal number of rows in df:", df.shape[0])

Unnamed: 0,Years,1981,1982,1983,1984,1985,1986,1987,1988,1989,1990,1991,1992,1993,1994,1995,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021
30,Colorado (People),486,417,483,465,474.0,509.0,430,462.0,399.0,422.0,429.0,498.0,544.0,511.0,505.0,536.0,465.0,449.0,452.0,443.0,510.0,517.0,501.0,553.0,535.0,497.0,505.0,513.0,583.0,555.0,573.0,672.0,619.0,663.0,701.0,812.0,779.0,889.0,846.0,922.0,1064.0
60,Ohio (People),1366,1220,1054,1112,1127.0,1097.0,1061,1074.0,1137.0,1178.0,1284.0,1232.0,1239.0,1175.0,1055.0,1008.0,966.0,969.0,939.0,890.0,1029.0,1069.0,934.0,1036.0,1116.0,1114.0,1105.0,1115.0,991.0,1148.0,1227.0,1263.0,1289.0,1211.0,1397.0,1524.0,1589.0,1555.0,1578.0,1764.0,1911.0
44,Maine (People),111,92,103,109,121.0,86.0,113,132.0,138.0,114.0,123.0,125.0,114.0,119.0,109.0,123.0,94.0,122.0,113.0,112.0,98.0,88.0,82.0,108.0,109.0,105.0,107.0,122.0,122.0,113.0,136.0,130.0,158.0,133.0,144.0,123.0,172.0,159.0,163.0,153.0,178.0
36,Hawaii (People),58,43,45,52,50.0,59.0,66,46.0,59.0,56.0,57.0,55.0,51.0,79.0,75.0,48.0,58.0,40.0,42.0,52.0,47.0,36.0,37.0,41.0,28.0,33.0,36.0,41.0,47.0,45.0,49.0,51.0,38.0,40.0,55.0,66.0,39.0,59.0,62.0,50.0,71.0
27,Arizona (People),519,501,472,522,550.0,616.0,647,674.0,666.0,699.0,696.0,745.0,811.0,902.0,986.0,833.0,853.0,887.0,822.0,796.0,842.0,968.0,849.0,897.0,934.0,982.0,951.0,907.0,856.0,931.0,964.0,946.0,941.0,927.0,970.0,1094.0,1134.0,1147.0,1136.0,1265.0,1365.0
69,Utah (People),166,190,168,176,164.0,190.0,194,169.0,172.0,201.0,214.0,195.0,197.0,224.0,225.0,213.0,209.0,242.0,192.0,193.0,229.0,207.0,230.0,225.0,227.0,224.0,253.0,238.0,260.0,314.0,308.0,324.0,339.0,337.0,367.0,370.0,410.0,397.0,394.0,429.0,450.0
76,Sources:,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
31,Connecticut (People),238,245,227,205,211.0,234.0,235,275.0,278.0,258.0,287.0,271.0,304.0,299.0,255.0,247.0,188.0,190.0,199.0,179.0,191.0,147.0,153.0,173.0,187.0,173.0,149.0,200.0,173.0,209.0,210.0,227.0,161.0,187.0,189.0,172.0,188.0,186.0,190.0,219.0,248.0
9,20-34 (People),14667,13670,12691,12470,12430.0,13402.0,12887,13417.0,13656.0,14781.0,15414.0,15078.0,15801.0,15169.0,13571.0,12219.0,11625.0,10549.0,9874.0,9758.0,10301.0,10446.0,10565.0,10067.0,10500.0,10535.0,10535.0,10297.0,9757.0,10042.0,10250.0,10632.0,10680.0,10427.0,11908.0,12966.0,13125.0,12704.0,12880.0,16270.0,17483.0
25,Alabama (People),824,820,710,750,743.0,812.0,749,801.0,801.0,912.0,928.0,873.0,943.0,945.0,931.0,870.0,848.0,819.0,790.0,766.0,737.0,724.0,765.0,679.0,736.0,780.0,812.0,820.0,830.0,782.0,785.0,831.0,860.0,815.0,958.0,1046.0,1124.0,1064.0,1076.0,1141.0,1315.0



Total number of rows in df: 81


## Gun deaths by county, https://data.world/nkrishnaswami/gun-deaths-by-county

In [10]:
# Read the TSV data
df = pd.read_csv('/Users/a/Desktop/ClassFolder_040123/final_project/Gun Deaths by County, 1999-2020.tsv', sep='\t')

# Display the first few rows of the DataFrame
display(df.sample(30))
print("\nTotal number of rows in df:", df.shape[0])

Unnamed: 0,year,county_name,state_code,county_code,deaths,population,crude_rate,crude_rate_lower_95_confidence_interval,crude_rate_upper_95_confidence_interval,age_adjusted_rate,age_adjusted_rate_lower_95_confidence_interval,age_adjusted_rate_upper_95_confidence_interval
2019,2002,"Hampden County, MA",25,25013,20,458780,4.36,2.66,6.73,4.36,2.66,6.73
10318,2015,"Baldwin County, AL",1,1003,37,203709,18.16,12.79,25.04,17.62,12.27,24.5
3555,2004,"Clallam County, WA",53,53009,12,67411,,9.2,31.1,,7.57,29.03
1981,2002,"Pike County, KY",21,21195,14,67438,,11.35,34.83,,11.75,36.07
12118,2017,"Hancock County, IN",18,18059,13,74985,,9.23,29.65,,8.34,28.19
1869,2002,"Brevard County, FL",12,12009,52,495425,10.5,7.84,13.76,9.34,6.89,12.38
13465,2018,"Victoria County, TX",48,48469,14,92035,,8.32,25.52,,7.97,24.47
9818,2014,"Shawnee County, KS",20,20177,23,178406,12.89,8.17,19.34,13.27,8.32,20.1
13903,2019,"Livingston County, MI",26,26093,13,191995,,3.61,11.58,,3.72,12.57
7911,2011,"Muskogee County, OK",40,40101,10,71003,,6.75,25.9,,6.37,26.44



Total number of rows in df: 15252


## Gun laws, https://www.kaggle.com/code/jonathanbouchet/firearm-regulations-in-the-u-s

In [11]:
# Read the CSV data
df = pd.read_csv('/Users/a/Desktop/ClassFolder_040123/final_project/raw_data.csv')

# Display the first few rows of the DataFrame
display(df.sample(30))
print("\nTotal number of rows in df:", df.shape[0])

Unnamed: 0,state,year,age18longgunpossess,age18longgunsale,age21handgunpossess,age21handgunsale,age21longgunpossess,age21longgunsale,age21longgunsaled,alcoholism,alctreatment,amm18,amm21h,ammbackground,ammlicense,ammpermit,ammrecords,ammrestrict,assault,assaultlist,assaultregister,assaulttransfer,backgroundpurge,cap14,cap16,...,residential,security,showing,stalking,statechecks,statechecksh,strawpurchase,strawpurchaseh,tenroundlimit,theft,threedaylimit,traffickingbackground,traffickingprohibited,traffickingprohibitedh,training,universal,universalh,universalpermit,universalpermith,violent,violenth,violentpartial,waiting,waitingh,lawtotal
1096,Washington,2012,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,25
742,Texas,2005,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,18
1124,Missouri,2013,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8
311,Idaho,1997,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6
1265,Kansas,2016,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7
1039,South Carolina,2011,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,12
1173,Mississippi,2014,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5
471,Michigan,2000,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,...,0,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,26
197,West Virginia,1994,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,12
1293,Utah,2016,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,11



Total number of rows in df: 1350


## Gun violence, https://www.kaggle.com/datasets/nidzsharma/us-mass-shootings-19822023

In [12]:
# Read the CSV data
df = pd.read_csv('/Users/a/Desktop/ClassFolder_040123/final_project/shooting-1982-2023.csv')

# Display the first few rows of the DataFrame
display(df.sample(30))
print("\nTotal number of rows in df:", df.shape[0])

Unnamed: 0,case,location,date,summary,fatalities,injured,total_victims,location.1,age_of_shooter,prior_signs_mental_health_issues,mental_health_details,weapons_obtained_legally,where_obtained,weapon_type,weapon_details,race,gender,latitude,longitude,type,year
93,Carthage nursing home shooting,"Carthage, North Carolina",29-03-2009,"Robert Stewart, 45, opened fire at a nursing h...",8,3,11,Other,45,Yes,His estranged wife told her workplace that her...,Yes,Local sporting goods stores and individuals,"One revolver, one shotgun",Winchester 1300 pump-action shotgun; .357 Magn...,white,Male,35.333434,-79.414592,Mass,2009
58,Baton Rouge police shooting,"Baton Rouge, Lousiana",17-07-2016,"Gavin Long, 29, a former Marine who served in ...",3,3,6,Other,29,Yes,Unclear,Unknown,-,Two semiautomatic rifles; one semiautomatic ha...,"IWI Tavor SAR 5.56 caliber rifle, Springfield ...",Black,M,30.433601,-91.081403,Spree,2016
111,Xerox killings,"Honolulu, Hawaii",02-11-1999,"Byran Koji Uyesugi, 40, a Xerox service techni...",7,0,7,Workplace,40,Yes,"A psychiatrist, testifying for the prosecutio...",Yes,Hunting Supplies of Hawaii (The Armory) in Hon...,One semiautomatic handgun,9mm Glock 17 semiautomatic handgun,Asian,Male,21.320063,-157.876462,Spree,1999
89,Hartford Beer Distributor shooting,"Manchester, Connecticut",03-08-2010,"Omar S. Thornton, 34, shot up his Hartford Bee...",9,2,11,Workplace,34,No,He apparently was driven over the edge by unad...,Yes,"Gun dealer in East Windsor, Conn.",Two semiautomatic handguns,Two 9mm Ruger SR9 semiautomatic handguns,black,Male,41.798764,-72.570068,Mass,2010
85,Su Jung Health Sauna shooting,"Norcross, Georgia",21-02-2012,"Jeong Soo Paek, 59, returned to a Korean spa f...",5,0,5,Other,59,Yes,His sister worried about his homicidal tendenc...,Yes,Unknown,One semiautomatic handgun,.45-caliber semiautomatic handgun,Asian,Male,33.9412127,-84.2135309,Mass,2012
4,Virginia Walmart shooting,"Chesapeake, Virginia",11-22-22,"Andre Bing, 31, who worked as a supervisor at ...",6,6,12,Workplace,31,-,-,-,-,semiautomatic handgun,-,Black,M,-,-,Mass,2022
37,Rite Aid warehouse shooting,"Perryman, Maryland",9-20-18,"Snochia Moseley, 26, reportedly a disgruntled ...",3,3,6,Workplace,26,-,-,Yes,-,semiautomatic handgun,Glock 9 mm,Black,F,39.455658,-76.208485,Mass,2018
102,Capitol Hill massacre,"Seattle, Washington",25-03-2006,"Kyle Aaron Huff, 28, opened fire at a rave aft...",7,2,9,Other,28,No,Police were unable to find any record that he ...,Yes,"Various sporting goods stores in Kalispell, Mont.","Two semiautomatic handguns, one rifle (assault...",".40-caliber Ruger, one other semiautomatic han...",white,Male,47.6229,-122.3165,Mass,2006
139,Dallas nightclub shooting,"Dallas, Texas",29-06-1984,"Abdelkrim Belachheb, 39, opened fire at an ups...",6,1,7,Other,39,Yes,"During his last meal with his wife, he confess...",No,"Hines Boulevard Pawn Shop in Dallas, Texas",One semiautomatic handgun,9mm Smith & Wesson 459 semiautomatic handgun,white,Male,32.925166,-96.838676,Mass,1984
33,SunTrust bank shooting,"Sebring, Florida",1-23-19,"Zephen A. Xaver, 21, fatally shot five women i...",5,0,5,Workplace,21,Yes,Xaver was reported to have a lenghty history o...,Yes,-,handgun,9 mm handgun,White,M,27.471043,-81.45847,Mass,2019



Total number of rows in df: 141


## firearms around the world, https://worldpopulationreview.com/country-rankings/gun-ownership-by-country

In [13]:
# Read the JSON data
df = pd.read_json('/Users/a/Desktop/ClassFolder_040123/final_project/gun-ownership-by-country-2023.json')

# Display the first few rows of the DataFrame
display(df.sample(30))
print("\nTotal number of rows in df:", df.shape[0])


Unnamed: 0,country,firearms,per100,military,lawEnf
72,Latvia,205000,10.5,27936.0,17000.0
176,Tuvalu,100,1.2,,26.0
96,Dominican Republic,795000,7.4,106495.0,46000.0
166,Bolivia,218000,2.0,87590.0,42000.0
116,Grenada,5000,4.6,,1000.0
212,Timor-Leste,3000,0.3,2527.0,4000.0
61,Equatorial Guinea,112000,12.5,2760.0,2000.0
17,Malta,119000,28.3,5547.0,1637.0
123,Botswana,97000,4.1,17100.0,11000.0
183,Burkina Faso,175000,0.9,43780.0,43000.0



Total number of rows in df: 217


## https://www.theviolenceproject.org/

In [14]:
# load excel file
xls = pd.ExcelFile('/Users/a/Desktop/ClassFolder_040123/final_project/Violence Project Mass Shooter Database - Version 6.1.xlsx')

# read all sheets to a dictionary
data = pd.read_excel(xls, sheet_name=None, header=1)

# data is now a dictionary where each key-value pair corresponds to a sheet
# you can access each DataFrame like so:
full_database_df = data['Full Database']


# display the various dfs
print("\nTotal number of rows in full_database_df:", full_database_df.shape[0])
display(full_database_df.sample(20))




Total number of rows in full_database_df: 189


Unnamed: 0,Case #,Shooter Last Name,Shooter First Name,Full Date,Day of Week,Day,Month,Year,Street Number,Street Name,City,State,County,Zip Code,Latitude,Longitude,State Code,Region,Urban/Suburban/Rural,Metro/Micro Statistical Area Type,Location,Insider or Outsider,Workplace Shooting,Multiple Locations,Other Location,...,Motive: Unknown,Role of Psychosis in the Shooting,Social Media Use,Leakage,Leakage How,Leakage Who,Leakage Specific/Nonspecific,Interest in Past Mass Violence,Relationship with Other Shooting(s),Specify Relationship to Other Shooting(s),Legacy Token,Pop Culture Connection,Specify Pop Culture Connection,Planning,Performance,Interest in Firearms,Firearm Proficiency,Total Firearms Brought to the Scene,Other Weapons or Gear,Specify Other Weapons or Gear,On-Scene Outcome,Who Killed Shooter On Scene,Attempt to Flee,Insanity Defense,Criminal Sentence
188,190,Bing,Andre,11/22/2022,Tuesday,22,11,2022,1521,Sam's Cir,Chesapeake,VA,City of Chesapeake,23320,36.772622,-76.251284,46,0,0,metropolitan,4,1,1,0,,...,1.0,2.0,0.0,0.0,,,0.0,0.0,1.0,Two days after Club Q shooting,1.0,0.0,,1.0,0.0,0.0,,1.0,1.0,Several additional magazines,0.0,1.0,0.0,2.0,0.0
147,149,Cetin,Arcan,9/23/2016,Friday,23,9,2016,201,Cascade Mall Dr,Burlington,WA,Skagit County,98233,48.464723,-122.337055,47,3,1,metropolitan,4,0,0,0,,...,1.0,0.0,1.0,0.0,,,,1.0,1.0,Initially tried to emulate James Holmes by sho...,0.0,0.0,,0.0,0.0,1.0,1.0,1.0,0.0,,3.0,0.0,0.0,2.0,0.0
5,6,White,Joseph,9/23/1970,Wednesday,23,9,1970,Building 12,W. Averell Harriman State Office Campus,Albany,NY,Albany County,12240,42.614852,-73.970812,32,2,0,metropolitan,6,1,1,0,,...,1.0,0.0,2.0,0.0,,,,0.0,0.0,,1.0,0.0,,0.0,0.0,0.0,2.0,1.0,0.0,,0.0,1.0,0.0,2.0,0.0
7,8,Grace,Edwin,6/21/1972,Wednesday,21,6,1972,383,Kings Hwy,Cherry Hill,NJ,Camden County,8034,39.927991,-74.994341,30,2,1,metropolitan,6,1,1,0,,...,0.0,0.0,2.0,0.0,,,,0.0,0.0,,0.0,0.0,,0.0,0.0,0.0,3.0,2.0,0.0,,0.0,1.0,0.0,2.0,0.0
170,172,Anderson,David,12/10/2019,Tuesday,10,12,2019,223,Martin Luther King Jr Dr,Jersey City,NJ,Hudson County,7305,40.707135,-74.083691,30,2,0,metropolitan,4,0,0,1,8.0,...,0.0,0.0,1.0,0.0,,,,1.0,0.0,,1.0,0.0,,1.0,0.0,0.0,3.0,5.0,1.0,"Pipe bomb, homemade silencer, homemade device ...",1.0,2.0,0.0,2.0,0.0
141,142,Farook,Syed Rizwan,12/2/2015,Wednesday,2,12,2015,1365,S Waterman Ave,San Bernardino,CA,San Bernardino County,92408,34.075766,-117.277467,5,3,0,metropolitan,6,1,1,1,8.0,...,0.0,0.0,1.0,0.0,,,,1.0,0.0,,1.0,0.0,,1.0,0.0,0.0,2.0,4.0,1.0,"load-bearing vest, bombs",1.0,2.0,0.0,2.0,0.0
64,65,Torres,Arturo,12/18/1997,Thursday,18,12,1997,1808,N Batavia St,Orange,CA,Orange County,92865,33.817491,-117.862546,5,3,1,metropolitan,9,1,1,0,,...,0.0,0.0,2.0,0.0,,,,0.0,0.0,,0.0,0.0,,0.0,0.0,0.0,3.0,3.0,0.0,,1.0,2.0,0.0,2.0,0.0
146,148,Johnson,Micah,7/7/2016,Thursday,7,7,2016,801,Main St,Dallas,TX,Dallas County,75202,32.779742,-96.805298,43,0,0,metropolitan,8,0,0,0,,...,0.0,0.0,1.0,1.0,4.0,9.0,0.0,1.0,0.0,,0.0,2.0,His Facebook had a photo with a member of the ...,1.0,0.0,1.0,3.0,3.0,1.0,"bombs, bulletproof vests at home",1.0,2.0,0.0,2.0,0.0
89,90,Brown,Elijah,7/2/2004,Friday,2,7,2004,4612,Speaker Rd,Kansas City,KS,Wyandotte County,66106,39.096158,-94.684427,16,1,0,metropolitan,9,1,1,0,,...,0.0,0.0,0.0,0.0,,,,0.0,0.0,,0.0,0.0,,0.0,0.0,0.0,0.0,2.0,0.0,,0.0,1.0,0.0,2.0,0.0
66,67,Golden,Andrew,3/24/1998,Tuesday,24,3,1998,1800,Highway 91 W,Jonesboro,AR,Craighead County,72404,35.857012,-90.805986,4,0,2,metropolitan,0,1,0,0,,...,0.0,0.0,2.0,1.0,0.0,6.0,1.0,0.0,0.0,,0.0,0.0,,1.0,0.0,1.0,3.0,10.0,1.0,"crossbow, machete, military belt, knives",2.0,0.0,1.0,1.0,5.0


In [15]:
# load excel file
xls = pd.ExcelFile('/Users/a/Desktop/ClassFolder_040123/final_project/Violence Project Mass Shooter Database - Version 6.1.xlsx')

# read all sheets to a dictionary
data = pd.read_excel(xls, sheet_name=None)

# data is now a dictionary where each key-value pair corresponds to a sheet
# you can access each DataFrame like so:
firearms_data_df = data['Firearms Data']
victims_df = data['Victims Data']
community_df = data['Community Data']
trends_df = data['Trend Data']


# display the various dfs
print("\nTotal number of rows in firearms_data_df:", firearms_data_df.shape[0])
display(firearms_data_df.sample(20))

print("\nTotal number of rows in victims_df:", victims_df.shape[0])
display(victims_df.sample(20))

print("\nTotal number of rows in community_df:", community_df.shape[0])
display(community_df.sample(20))

print("\nTotal number of rows in trends_df:", trends_df.shape[0])
display(trends_df.sample(20))



Total number of rows in firearms_data_df: 404


Unnamed: 0,Case #,Shooter Last Name,Shooter First Name,Full Date,Make and Model,Classification,Caliber,Used in Shooting?,Modified,Large Capacity Magazine,Extended Magazine,When Obtained,Legal Purchase,Illegal Purchase,Assembled with Legal Parts,Gifted,Theft,Unknown
354,161,Pagourtzis,Dimitrios,2018-05-18 00:00:00,.38-caliber Rossi revolver,0.0,2.0,1.0,0.0,0.0,,0.0,,,0.0,0.0,1.0,0.0
225,109,Zamora,Isaac,2008-09-02 00:00:00,Winchester lever-action rifle,2.0,,1.0,0.0,0.0,0.0,0.0,,,0.0,0.0,2.0,0.0
180,87,Williams,Doug,2003-07-08 00:00:00,12-gauge shotgun,1.0,2.0,1.0,0.0,0.0,,,2.0,0.0,0.0,0.0,0.0,0.0
256,124,Goh,One,2012-04-02 00:00:00,.45-caliber pistol,0.0,2.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0
116,59,Vernon,Michael,1995-12-19 00:00:00,9mm CZ Model 85 pistol,0.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,5.0,0.0,0.0,0.0,0.0
302,144,Dalton,Jason,2016-02-20 00:00:00,9mm Glock 19 pistol,0.0,1.0,1.0,0.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0
266,129,Lanza,Adam,2012-12-14 00:00:00,10mm Glock 20 pistol,0.0,2.0,2.0,0.0,1.0,0.0,0.0,,,0.0,0.0,1.0,0.0
3,1,Whitman,Charles,1966-08-01 00:00:00,6.35mm Galesi-brescia pistol,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
356,163,Casarez,Javier,2018-09-12 00:00:00,.50-caliber Smith & Wesson 500 revolver,0.0,,1.0,0.0,0.0,,1.0,3.0,0.0,0.0,0.0,0.0,0.0
1,1,Whitman,Charles,1966-08-01 00:00:00,Remington .35-caliber Model 141 pump-action rifle,2.0,2.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0



Total number of rows in victims_df: 1325


Unnamed: 0,Case #,Shooter Last Name,Shooter First Name,Full Date,Victim Name,Age,Gender,Race,Knew Shooter,"If Known, Relationship to Shooter",Relationship to Shooter,Life Expectancy,Years Lost
164,29,Huberty,James,7/18/1984,Neva Denise Caine,22.0,1.0,0.0,0.0,,0.0,75.6,53.6
345,56,Ferguson,Rodriguez,12/31/1994,Jamie Hunter,39.0,0.0,,0.0,,0.0,66.6,27.6
761,126,Holmes,James,7/20/2012,Veronica Moser-Sullivan,6.0,1.0,0.0,0.0,,0.0,80.7,74.7
1083,158,Cruz,Nikolas,2/14/2018,Alaina Petty,14.0,1.0,0.0,0.0,,0.0,80.5,66.5
1159,168,Craddock,DeWayne,5/31/2019,Christopher Kelly Rapp,54.0,0.0,0.0,1.0,coworker,3.0,68.0,14.0
274,41,Hennard,George,10/16/1991,Nancy Faye Hedgepeth Stansbury,44.0,1.0,0.0,0.0,,0.0,72.2,28.2
226,36,Henderson,Clemmie,9/22/1988,Irma C. Ruiz,40.0,1.0,2.0,0.0,,0.0,71.1,31.1
1096,160,Reinking,Travis,4/22/2018,Taurean C. Sanderlin,29.0,0.0,1.0,0.0,,0.0,64.5,35.5
679,113,Hasan,Nidal,11/5/2009,Jason Dean Hunt,22.0,0.0,0.0,2.0,stationed at Fort Hood,5.0,72.7,50.7
588,102,Cho,Seung-Hui,4/16/2007,Jeremy Michael Herbstritt,27.0,0.0,0.0,2.0,classmate,4.0,70.7,43.7



Total number of rows in community_df: 130


Unnamed: 0,Case #,Shooter Last Name,Shooter First Name,Location (Code),Location (Specify),Shooting Start Time,Shooting End Time,Time of Day,Zip Code,Total Population,Median Age,% White Alone,% Female Household,% Rental Units,% Employed,% High School Graduate,% College Graduate,% Without Health Insurance,Nearest Hospital (Miles),N Mental Health Providers in Zip Code,N Gun Stores in Zip Code,Size of Police Dept,Homicide Rate
64,123,Dekraai,Scott,4,Retail,13:21:00,13:26:00,1.0,90740,23729,57.5,83.2,6.1,25.2,94.4,42.3,12.6,,5.8,1.0,0.0,36.0,4.1
63,122,Sencion,Eduardo,5,Restaurant / Bar / Nightclub,08:58:00,9:06,0.0,89701,27625,40.2,79.3,12.3,41.4,52.8,36.6,39.1,23.6,5.5,5.0,2.0,,
29,88,Tapia,Salvador,6,Workplace,08:37:00,,0.0,60609,2896016,31.5,42.0,18.9,56.2,55.1,71.8,25.5,,,,,13400.0,22.04
110,171,Ator,Seth,8,Driving,15:17:00,17:30:00,1.0,79762,44014,31.0,95.5,14.9,43.3,68.5,84.3,22.7,18.5,3.7,1.0,2.0,170.0,9.7
26,85,Lockey,William,6,Workplace,08:00:00,,0.0,46628,26726,34.0,63.7,18.3,30.2,62.9,79.3,18.4,,3.8,1.0,0.0,255.0,19.37
86,147,Mateen,Omar,5,Restaurant / Bar / Nightclub,01:58:00,05:15:00,3.0,32806,26797,38.6,83.9,11.0,35.9,64.9,28.5,21.3,16.8,0.6,1.0,2.0,697.0,5.8
103,164,Bowers,Robert,3,House of Worship,09:54:00,11:08:00,0.0,15217,28049,34.5,80.2,4.4,48.5,62.3,96.7,72.1,3.7,,3.0,0.0,1100.0,22.4
120,181,Cassidy,Samuel,6,Workplace,6:34,6:40,0.0,95110,19928,32.4,20.5,11.2,61.3,68.4,75.7,37.4,8.22,3.0,4.0,0.0,959.0,4.52
80,139,Abdulazeez,Muhammad,2,Military Site,10:45:00,11:15:00,0.0,37406,14627,35.3,16.6,34.5,53.7,43.5,43.4,2.0,13.8,4.9,1.0,0.0,477.0,15.5
14,73,Floyd,Zane,4,Retail,05:16:00,05:24:00,0.0,89119,49445,33.2,51.2,12.4,78.4,62.9,34.2,2.6,,0.6,5.0,0.0,2353.0,12.8



Total number of rows in trends_df: 85


Unnamed: 0,Year,U.S. Population (Census),Total U.S. Murders (FBI UCR),U.S. Murder Rate per Million,Mass Shootings (Incidents),Mass Shooting Rate Per Million,Mass Shooting Yearly Growth Rate (Incidents),Mass Shooting 3 Year Average (Incidents),Mass Shooting 5 Year Average (Incidents),Mass Shooting 10 Year Average (Incidents),Mass Shooting Deaths (Total),Deaths per Mass Shooting (Mean),Mass Shooting Death Rate per Million,Mass Shooting 3 Year Average (Deaths),Mass Shooting 5 Year Average (Deaths),Mass Shooting 10 Year Average (Deaths)
74,,,,,11,"Austin, TX. Aug. 1, 1966",15.0,,,,,,,,,
3,1969.0,202676946,14760.0,72.825254,1,0.004934,2.18,1.0,,,4.0,4.0,0.019736,5.666667,,
76,,,,,13,"Edmond, OK. Aug. 20, 1986",14.0,,,,,,,,,
84,,,,,21,"Virginia Beach, VA. May 31, 2019",12.0,,,,,,,,,
12,1978.0,222584545,19560.0,87.876721,1,0.004493,2.83,1.666667,1.5,1.375,5.0,5.0,0.022463,9.666667,8.5,7.375
1,1967.0,198712056,12240.0,61.596665,1,0.005032,2.06,,,,6.0,6.0,0.030194,,,
24,1990.0,249464396,23440.0,93.961304,1,0.004009,4.0,2.0,1.6,2.1,11.0,11.0,0.044094,13.333333,12.0,14.7
50,2016.0,323071342,17413.0,53.898312,5,0.015476,8.49,4.333333,4.8,4.7,70.0,14.0,0.21667,42.333333,44.2,38.8
36,2002.0,287973924,16229.0,56.355797,1,0.003473,5.67,2.666667,3.6,3.5,4.0,4.0,0.01389,12.666667,20.6,18.2
71,,Total,185.0,,8,"Uvalde, TX, May 24, 2022",21.0,,,,,,,,,
