# Load Modules Using Import

In [None]:
# import pandas
import pandas as pd

# import matplotlib
import matplotlib.pyplot as plt

In [None]:
import geopandas as gpd
import warnings
import matplotlib.pyplot as plt
# import geoplot
from shapely.geometry import Point

warnings.filterwarnings('ignore')

In [None]:
# import plotly express
import plotly.express as px

# import plotly graph objects
import plotly.graph_objects as go

# Exploratory Data Analysis/Visualization With Combined ND Football Schedules

## Load Schedules as Pandas DataFrame

In [None]:
# load data from file to dataframe
football = pd.read_csv("combined_nd_schedules_cleaned.csv", index_col=None, parse_dates=True)

# check data has loaded
football.head()

In [None]:
# create datetime object from Standardized_Date field
football['Datetime'] = pd.to_datetime(football['Standardized_Date'])

# check updated dataframe
football.head()

In [None]:
# get list of dataframe columns
football.info()

## Static Data Visualization With Pandas and Matplotlib

In [None]:
# basic syntax for plotting from dataframe; default is all numeric fields and a line plot
football.plot()

In [None]:
# basic syntax for plotting single column from dataframe
football['Datetime'].plot()

In [None]:
# bar plot with 'Season' on the X axis and number of wins 'W' on the Y axis
football.plot.bar(x="Season", y="W")

### Plotting Categorical Data Using `.groupby()` and `.size()`

In [None]:
# bar plot with number of games for each day of the week
football.groupby('Day').size().plot(kind='bar')

In [None]:
# bar plot with overall number of wins/losses/ties 
football.groupby('Result').size().plot(kind='bar')

In [None]:
# bar plot with overall number of home/away/neutral site games 
football.groupby('Game_Type').size().plot(kind='bar')

#### Additional Resources on Plotting With `pandas` and `matplotlib`

- Prof. Walden's [Elements of Computing lab on visualizing data with `pandas` and `matplotlib`](https://github.com/kwaldenphd/more-with-matplotlib)
- `pandas`, [User Guide, "Visualization"](https://pandas.pydata.org/docs/user_guide/visualization.html)
- `pandas`, [Getting Started, "Plotting"](https://pandas.pydata.org/docs/getting_started/intro_tutorials/04_plotting.html)
- Chapter 9 "Plotting and Visualization" from Wes McKinney, [*Python for Data Analysis: Data Wrangling With pandas, Numpy, and IPython*](https://www.oreilly.com/library/view/python-for-data/9781491957653/) (O'Reilly, 2017)

### Plotting Geospatial Data With `geopandas`, `shapely`, and `matplotlib`

**Free online geocoding services:**
- [LocalFocus data journalism batch geocoder](https://geocode.localfocus.nl/)
- [Texas A&M Geocoding Services](https://geoservices.tamu.edu/Services/Geocode/)
  * *Requires creating a free account*
  
**Installing and Configuring `geopandas`**:
- Anaconda
  * Tanish Gupta, "[Fastest Way to Intsall Geopandas in Jupyter Notebooks](https://medium.com/analytics-vidhya/fastest-way-to-install-geopandas-in-jupyter-notebook-on-windows-8f734e11fa2b)" *Analytics Vidhya* (6 December 2020)
  * Anaconda, "[conda-forge packages, geopandas](https://anaconda.org/conda-forge/geopandas)" *Anaconda documentation*
  * GeoPandas, "[Installation](https://geopandas.org/getting_started/install.html)" *GeoPandas documentation*
- Google CoLab
  * Abdishakur Hassan, Jupyter notebook on using `geopandas` in Google CoLab, from "[Geographic data science tutorials with Python](https://github.com/shakasom/GDS)" *GitHub repository*
    * [Google CoLab](https://colab.research.google.com/github/shakasom/GDS/blob/master/Part1%20-%20Introduction.ipynb)
    * [GitHub](https://github.com/shakasom/GDS/blob/master/Part1%20-%20Introduction.ipynb)
    * [NBViewer](https://nbviewer.jupyter.org/github/shakasom/GDS/blob/master/Part1%20-%20Introduction.ipynb)
    
**Getting Started With GeoPandas**:
- Jonathan Soma, "[Mapping with geopandas](https://jonathansoma.com/lede/foundations-2017/classes/geopandas/mapping-with-geopandas/)" from 2017 "[Foundations of Computing](https://jonathansoma.com/lede/foundations-2017/)" course, Columbia Graduate School of Journalism
- CoderzColumn, "[Plotting Static Maps with geopandas](https://coderzcolumn.com/tutorials/data-science/plotting-static-maps-with-geopandas-working-with-geospatial-data)" *CoderzColumn* (11 March 2020)
- GeoPandas, "[Plotting with Geoplot and GeoPandas](https://geopandas.org/gallery/plotting_with_geoplot.html)" *GeoPandas documentation*

In [None]:
# scatterplot of latitude and longitude data
football.plot(x= "Longitude", y="Latitude", kind='scatter')

<iframe width="100%" height="520" frameborder="0" src="https://kwalden.carto.com/builder/be217bb8-46f4-47a1-83dc-96ccd200e175/embed" allowfullscreen webkitallowfullscreen mozallowfullscreen oallowfullscreen msallowfullscreen></iframe>

In [None]:
# show available geopandas datasets (for basemaps)
geopandas.datasets.available

# world basemap from naturalearth_lowres geopandas dataset
world = gpd.read_file(geopandas.datasets.get_path("naturalearth_lowres"))

# show basemap head
world.head()

In [None]:
# function that takes latitude and longitude columns from dataframe and creates Point field
def make_point(row):
    return Point(row.Longitude, row.Latitude)

points = football.apply(make_point, axis=1)

# create GeoDataFrame from football data and points geometry
football_map = gpd.GeoDataFrame(football, geometry=points)

# set GeoDataFrame coordinate system
football_map.crs = {'init': 'epsg:4326'}

# show head of GeoDataFrame
football_map.head()

In [None]:
# preliminary cartesian coordinate plot of GeoDataFrame
football_map.plot(figsize=(20,5))

In [None]:
# create figure axes with world basemap
ax = world.plot(figsize=(15, 5), linewidth=0.25, edgecolor="white", color="lightgrey")

# set axes title
ax.set_title("Geography of Notre Dame Football")

# configure axes
ax.axis('off')

# plot football data with points colored by season
football_map.plot(markersize=10, column="Season", cmap='viridis', alpha=0.5, ax=ax, legend=True)

### Interactive Visualization With `plotly`

In [None]:
# stacked bar chart with season on x axis and number of points on y axis, colored by conference

# import plotly
import plotly.express as px

# generate bar chart
fig = px.bar(football, x="Season", y="Pts", color="Conf", title="Look at all the football!")

# update x axes data type
fig.update_xaxes(type='category')

# show figure
fig.show()

In [None]:
# scatterplot with latitude and longitude, colored by conference
fig = px.scatter(football, x="Longitude", y="Latitude", color="Conf", hover_data=['Standardized_Opponent', 'Season'])

# show scatterplot
fig.show()

#### Interactive Maps with Plotly

##### GeoMaps or Outline-Based Maps

In [None]:
# import plotly graph objects
import plotly.graph_objects as go

# create scattergeo plot using plotly graph_object syntax
fig = go.Figure(data=go.Scattergeo(lon = football["Longitude"], lat=football["Latitude"], text = football["Standardized_Opponent"], marker_color = football['Season'], mode= 'markers', marker_colorscale=px.colors.sequential.Greens))

# update figure to center and zoom base map based on data parameters
fig.update_geos(fitbounds="locations", showcountries=True)

# update figure layout
fig.update_layout(height=300, margin={"r":0,"t":0,"l":0,"b":0}, title="Look at all the football!")

# show figure
fig.show()

In [None]:
# same scattergeo plot using plotly express syntax
fball_map = px.scatter_geo(football, lat="Latitude", lon="Longitude", color = football['Conf'], hover_data=['Standardized_Opponent', 'Season'])

fball_map.show()

##### Mapbox or Tile-Based Maps

In [None]:
# scatter_mapbox plot generated using plotly express syntax
fig = px.scatter_mapbox(football, lat="Latitude", lon="Longitude", color=football["Conf"],
                  color_continuous_scale=px.colors.cyclical.IceFire, mapbox_style="carto-positron")
# show figure
fig.show()

In [None]:
# same scatter_mapbox plot with alternate openstreetmap tiles
fig = px.scatter_mapbox(football, lat="Latitude", lon="Longitude", color=football["Conf"],
                  color_continuous_scale=px.colors.cyclical.IceFire)

# update figure base map
fig.update_layout(mapbox_style="open-street-map")

# update figure margin
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})

# show figure
fig.show()

In [None]:
# same scatter_mapbox plot with points colored by season
fig = px.scatter_mapbox(football, lat="Latitude", lon="Longitude", hover_name="Standardized_Opponent", hover_data=["Season", "Result"],
                        color_continuous_scale=px.colors.sequential.Viridis, color="Season", zoom=3, height=300, mapbox_style="carto-positron")

# show figure
fig.show()

#### Additional Resources on Interactive Visualization With `pandas` and `plotly`

- Prof. Walden's [Elements of Computing lab on interactive data visualization with `pandas` and `plotly`](https://github.com/kwaldenphd/interactive-visualization-python)
- [`plotly` documentation and tutorials](https://plotly.com/python/)
  * Statistical charts: [`plotly`, Plotly Python Open Source Graphing Library Statistical Charts](https://plotly.com/python/statistical-charts/)
  * Scientific charts: [`plotly`, Plotly Python Open Source Graphing Library Scientific Charts](https://plotly.com/python/scientific-charts/)
  * Financial charts: [`plotly`, Plotly Python Open Source Graphing Library Financial Charts](https://plotly.com/python/financial-charts/)
  * Maps: [`plotly`, Plotly Open Source Graphing Library Maps](https://plotly.com/python/maps/)
  * Full gallery of chart types: [`plotly`, Plotly Python Open Source Graphing Library](https://plotly.com/python/)

# Exploratory Data Analysis/Visualization With Single Year ND Directory

## Load Directory as Pandas DataFrame

In [None]:
# load data from file to dataframe
directory = pd.read_csv("ND_Directory_Cleaned_Geography.csv", index_col=None)

# check data has loaded
directory.head()

In [None]:
# get list of dataframe columns
directory.info()

## Static Data Visualization With Pandas and Matplotlib

### Plotting Categorical Data Using `.groupby()` and `.size()`

In [None]:
# horizontal bar plot with number of students by country
directory.groupby("Country").size().plot(kind='barh')

In [None]:
# bar chart with number of students by major
directory.groupby('Major').size().plot(kind='bar')

In [None]:
# horizontal bar chart with number of students by state
directory.groupby("State").size().plot(kind='barh')

### Plotting Geospatial Data With `geopandas`, `shapely`, and `matplotlib`

In [None]:
# scatterplot of latitude and longitude data
directory.plot(x= "Longitude", y="Latitude", kind='scatter')

In [None]:
# show available geopandas datasets (for basemaps)
geopandas.datasets.available

# world basemap from naturalearth_lowres geopandas dataset
world = gpd.read_file(geopandas.datasets.get_path("naturalearth_lowres"))

# show basemap head
world.head()

In [None]:
# function that takes latitude and longitude columns from dataframe and creates Point field
def make_point(row):
    return Point(row.Longitude, row.Latitude)

points = directory.apply(make_point, axis=1)

# create GeoDataFrame from football data and points geometry
directory_map = gpd.GeoDataFrame(directory, geometry=points)

# set GeoDataFrame coordinate system
directory_map.crs = {'init': 'epsg:4326'}

# show head of GeoDataFrame
directory_map.head()

In [None]:
# preliminary cartesian coordinate plot of GeoDataFrame
directory_map.plot(figsize=(20,5))

In [None]:
# create figure axes with world basemap
ax = world.plot(figsize=(15, 5), linewidth=0.25, edgecolor="white", color="lightgrey")

# set axes title
ax.set_title("Geography of Notre Dame Student Body, 1922-1923")

# configure axes
ax.axis('off')

# plot directory data with points colored by major
directory_map.plot(markersize=10, column="Major", cmap='viridis', alpha=0.5, ax=ax, legend=True)

### Interactive Visualization With `plotly`

In [None]:
# bar chart showing number of students by major

# generate bar chart
fig = px.bar(directory, x='Major')

# update xaxis to categorical variable
fig.update_xaxes(type="category")

# show figure
fig.show()

In [None]:
# horizontal bar chart showing number of students by state

# generate horizontal bar chart
fig = px.bar(directory, y="State", orientation="h")

# show figure
fig.show()

In [None]:
# horizontal bar chart showing number of students by country

# generate horizontal bar chart
fig = px.bar(directory, y="Country", orientation="h")

# show figure
fig.show()

#### Interactive Maps with Plotly

##### Outline or Geo-Based Maps

In [None]:
# filter dataframe to remove rows with NA values
directory.dropna(axis='rows', inplace=True)

# cast latitude column to float
directory['Latitude'] = directory['Latitude'].apply(pd.to_numeric)

# cast longitude column to float
directory['Longitude'] = directory['Longitude'].apply(pd.to_numeric)

# show updated dataframe info
directory.info()

In [None]:
# world scatter_geo plot generated using plotly express syntax

# create figure
directory_map = px.scatter_geo(directory, lat="Latitude", lon="Longitude", color = directory['Major'], hover_data = ['First_Name', 'Last_Name', 'Major', 'Standardized_City_State'])

# update figure geometry
directory_map.update_geos(visible=False, showcountries=True)

# show figure
directory_map.show()

In [None]:
# US scatter_geo plot generated using plotly express syntax

# create figure
directory_map = px.scatter_geo(directory, lat="Latitude", lon="Longitude", 
                               color = directory['Major'], 
                               hover_data = ['First_Name', 'Last_Name', 'Major', 'Standardized_City_State'])

# updated figure geometry
directory_map.update_geos(visible=False, scope='usa', showcountries=True, 
                          showsubunits=True, subunitcolor="Black")

# show figure
directory_map.show()

##### Mapbox or Tile-Based Maps

In [None]:
# scatter_mapbox plot generated using plotly express syntax
fig = px.scatter_mapbox(directory, lat="Latitude", lon="Longitude", color=directory["Major"],
                        mapbox_style="carto-positron")
# show figure
fig.show()

In [None]:
# same scatter_mapbox plot with alternate openstreetmap tiles
fig = px.scatter_mapbox(directory, lat="Latitude", lon="Longitude", color=directory["Major"])

# update figure base map
fig.update_layout(mapbox_style="open-street-map")

# update figure margin
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})

# show figure
fig.show()

# Exploratory Data Analysis/Visualization With Combined ND Football Rosters

## Load Rosters as Pandas DataFrame

In [None]:
# load data from file to dataframe
rosters = pd.read_csv("combined_nd_rosters.csv", index_col=None)

# check data has loaded
rosters.head()

In [None]:
# create datetime object from Standardized_Date field
rosters['Datetime'] = pd.to_datetime(rosters['Season'])

# check updated dataframe
rosters.head()

In [None]:
# get list of dataframe columns
rosters.info()

## Static Data Visualization With Pandas and Matplotlib

In [None]:
# basic syntax for plotting from dataframe; default is all numeric fields and a line plot
rosters.plot()

In [None]:
# basic syntax for plotting single column from dataframe
rosters['Season'].plot()

In [None]:
# bar plot with 'Season' on the X axis and number of games 'G' on the Y axis
rosters.plot.bar(x="Season", y="G")

In [None]:
# scatterplot with'Season' on the X axis and Rushing Yards on the Y axis
rosters.plot.scatter(x='Season', y='RushingTD')

In [None]:
# area plot with 'Season' on the X axis and rushing/receiving average columns on the Y axis
rosters.plot.area(x='Season', y=['RushingAvg', 'ReceivingAvg'], stacked=False)

### Interactive Visualization With `plotly`

In [None]:
# bar chart showing rushing average by season

# generate bar chart
fig = px.bar(rosters, x='Season', y='RushingAvg')

# update xaxis to categorical variable
fig.update_xaxes(type="category")

# show figure
fig.show()

In [None]:
# bar chart showing number of games by season

# generate bar chart
fig = px.bar(rosters.groupby('Season').count(), y='G')

# update xaxis to categorical variable
fig.update_xaxes(type="category")

# update figure fill color
fig.update_traces(marker=dict(color='green'))

# show figure
fig.show()

## Merging Roster Data and Directory Data

Sample workflow for merging directory data and roster data for single season/academic year (1924-1925).

In [None]:
# create new dataframe with roster from select seasons
single_yr_roster = rosters[rosters['Season'].isin([1923, 1924])]

# show new dataframe
single_yr_roster.head()

In [None]:
# new roster dataframe info
single_yr_roster.info()

In [None]:
# show directory dataframe
directory.head()

In [None]:
# directory dataframe info
directory.info()

In [None]:
# SQL style join in Pandas using merge on first and last name columns
merged_df = pd.merge(single_yr_roster, directory, how='left', left_on=['First_Name', 'Last_Name'], right_on = ['First_Name', 'Last_Name'])

# show merged dataframe
merged_df

In [None]:
# show merged_df info
merged_df.info()

## Static Data Visualization With Pandas and Matplotlib

### Plotting Categorical Data Using `.groupby()` and `.size()`

In [None]:
# horizontal bar plot with number of students by country
merged_df.groupby("State").size().plot(kind='bar')

In [None]:
# bar chart with number of students by major
merged_df.groupby('Major').size().plot(kind='bar')

In [None]:
# horizontal bar chart with number of students by state
merged_df.groupby("City").size().plot(kind='bar')

### Plotting Geospatial Data With `geopandas`, `shapely`, and `matplotlib`

In [None]:
from matplotlib import pyplot as plt

# scatterplot of latitude and longitude data
merged_df.plot(x= "Longitude", y="Latitude", kind='scatter')

In [None]:
# show available geopandas datasets (for basemaps)
geopandas.datasets.available

# world basemap from naturalearth_lowres geopandas dataset
world = gpd.read_file(geopandas.datasets.get_path("naturalearth_lowres"))

# show basemap head
world.head()

In [None]:
# function that takes latitude and longitude columns from dataframe and creates Point field
def make_point(row):
    return Point(row.Longitude, row.Latitude)

points = merged_df.apply(make_point, axis=1)

# create GeoDataFrame from football data and points geometry
merged_df_map = gpd.GeoDataFrame(merged_df, geometry=points)

# set GeoDataFrame coordinate system
merged_df_map.crs = {'init': 'epsg:4326'}

# show head of GeoDataFrame
merged_df_map.head()

In [None]:
# preliminary cartesian coordinate plot of GeoDataFrame
merged_df_map.plot(figsize=(20,5))

In [None]:
# create figure axes with world basemap
ax = world.plot(figsize=(15, 5), linewidth=0.25, edgecolor="white", color="lightgrey")

# set axes title
ax.set_title("Geography of Notre Dame Football Student Athletes, 1922-1923")

# configure axes
ax.axis('off')

# plot directory data with points colored by major
merged_df_map.plot(markersize=10, column="Major", cmap='viridis', alpha=0.5, ax=ax, legend=True)

### Interactive Visualization With `plotly`

In [None]:
# bar chart showing number of players by major

# generate bar chart
fig = px.bar(merged_df, x='Major')

# update xaxis to categorical variable
fig.update_xaxes(type="category")

# show figure
fig.show()

In [None]:
# bar chart showing number of players by state

# generate horizontal bar chart
fig = px.bar(merged_df, y="State")

# show figure
fig.show()

In [None]:
# horizontal chart showing number of players by city

# generate horizontal bar chart
fig = px.bar(merged_df, y="City", orientation="h")

# show figure
fig.show()

#### Interactive Maps with Plotly

##### Outline or Geo-Based Maps

In [None]:
# filter dataframe to remove rows with NA values
merged_df.dropna(axis='rows', inplace=True)

# show updated dataframe info
merged_df.info()

In [None]:
# world scatter_geo plot generated using plotly express syntax

# create figure
roster_map = px.scatter_geo(merged_df, lat="Latitude", lon="Longitude", color = merged_df['Major'], hover_data = ['First_Name', 'Last_Name', 'Major', 'Standardized_City_State'])

# update figure geometry
roster_map.update_geos(visible=False, showcountries=True)

# show figure
roster_map.show()

In [None]:
# US scatter_geo plot generated using plotly express syntax

# create figure
merged_df = px.scatter_geo(merged_df, lat="Latitude", lon="Longitude", color = merged_df['Major'], hover_data = ['First_Name', 'Last_Name', 'Major', 'Standardized_City_State'])

# updated figure geometry
merged_df.update_geos(visible=False, scope='usa', showcountries=True, 
                          showsubunits=True, subunitcolor="Black")

# show figure
merged_df.show()