<a href="https://colab.research.google.com/github/kyledenis/data-visualisations/blob/master/data_visualisations.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# Downloading the data
!wget http://fryziggafl.net/static/fryziggafl.zip
!unzip fryziggafl.zip -o

# Load the data
afl_data = pd.read_csv('fryziggafl.csv')

columns_to_drop = set(afl_data.columns) - {'match_home_team', 'match_home_team_score', 'match_away_team',
                                           'match_away_team_score', 'match_date', 'match_round',
                                           'match_winner', 'match_margin'}
afl_data.drop(columns=columns_to_drop, inplace=True)

afl_data['match_date'] = pd.to_datetime(afl_data['match_date'])

afl_data['away_team_outcome'] = 'unknown'
afl_data.loc[afl_data['match_winner'] == afl_data['match_away_team'], 'away_team_outcome'] = 'won'
afl_data.loc[afl_data['match_winner'] == afl_data['match_home_team'], 'away_team_outcome'] = 'lost'
afl_data.loc[afl_data['match_winner'] == 'Draw', 'away_team_outcome'] = 'draw'

afl_data['year'] = afl_data['match_date'].dt.year

# Calculating win probability for each score
win_prob = afl_data.groupby('match_away_team_score')['away_team_outcome'].value_counts(normalize=True).unstack().fillna(0)
afl_data['win_probability'] = afl_data['match_away_team_score'].map(win_prob['won'])

afl_post_2000 = afl_data[afl_data['match_date'] > '2000-01-01']

plt.figure(figsize=(15, 10))

years_2000 = afl_post_2000['year'].unique()

for year in years_2000:
    yearly_data = afl_post_2000[afl_post_2000['year'] == year]

    sns.regplot(x='match_away_team_score',
                y='win_probability',
                data=yearly_data,
                scatter_kws={'alpha':0.5},
                line_kws={"color": plt.cm.viridis(years_2000.tolist().index(year) / len(years_2000))},
                logistic=True, ci=None, label=str(year))

plt.title('Yearly Analysis from 2000 onwards: Probability of Away Team Winning based on Their Score')
plt.ylabel('Probability of Winning')
plt.xlabel('Away Team Score')
plt.legend(title='Year')
plt.show()


In [None]:
import pandas as pd
import folium
import requests, zipfile, io

australian_states = requests.get('https://raw.githubusercontent.com/tonywr71/GeoJson-Data/master/australian-states.json').json()

datapack_2021_zipped = requests.get('https://www.abs.gov.au/census/find-census-data/datapacks/download/2021_GCP_STE_for_AUS_short-header.zip')
datapack_2021 = zipfile.ZipFile(io.BytesIO(datapack_2021_zipped.content))
datapack_2021.extractall()

census_data = pd.read_csv('./2021 Census GCP States and Territories for AUS/2021Census_G01_AUST_STE.csv')

census_data.STE_CODE_2021 = census_data.STE_CODE_2021 - 1
census_data.drop(8, inplace=True)

census_data['male_to_female_ratio'] = census_data['Tot_P_M'] / census_data['Tot_P_F']
census_data['ratio_diff_percentage'] = (census_data['male_to_female_ratio'] - 1) * 100

bins = [-10, -5, -1, 1, 5, 10]

# Create the map
m_ratio = folium.Map(location=(-23.07, 132.08), zoom_start=5)

folium.Choropleth(
    geo_data=australian_states,
    data=census_data,
    columns=["STE_CODE_2021", "ratio_diff_percentage"],
    key_on='feature.id',
    fill_color='PiYG',
    fill_opacity=0.7,
    line_opacity=0.2,
    bins=bins,
    legend_name="Difference from equal male-to-female ratio (%). Green = more males. Pink = more females",
).add_to(m_ratio)

# Display the map
m_ratio


In [None]:
import pandas as pd
import networkx as nx
import shutil

from bokeh.io import output_notebook, show
from bokeh.plotting import figure
from bokeh.models import Range1d, Circle, MultiLine
from bokeh.plotting import from_networkx

output_notebook()

# Download and unpack the dataset
!wget http://nrvis.com/download/data/misc/lesmis.zip
shutil.unpack_archive('lesmis.zip')

# Read the dataset
data = pd.read_csv('lesmis.mtx', sep=' ', skiprows=2, names=['n1', 'n2', 'weight'])

# Create a graph
G = nx.Graph()
for index, row in data.iterrows():
    G.add_edge(row['n1'], row['n2'], weight=row['weight'])

# Use circular layout
layout = nx.circular_layout(G)

# Create a Bokeh plot
title = "Les Misérables Character Interactions"
plot = figure(title=title, x_range=Range1d(-1.1, 1.1), y_range=Range1d(-1.1, 1.1))

# Create a Bokeh graph from NetworkX graph
network_graph = from_networkx(G, layout)

# Set node size and color
network_graph.node_renderer.glyph = Circle(size=10, fill_color='skyblue')

# Differentiating the edge weights using line thickness
edge_weights = [(weight/5) for _, _, weight in G.edges(data='weight')]
network_graph.edge_renderer.glyph = MultiLine(line_alpha=0.8, line_width='line_width')

# Modify data source of graph renderer
network_graph.edge_renderer.data_source.data['line_width'] = edge_weights

# Attach the customized edges and nodes to the plot
plot.renderers.append(network_graph)

show(plot)
