In [5]:
import altair as alt
import pandas as pd
alt.data_transformers.disable_max_rows()

DataTransformerRegistry.enable('default')

In [6]:
data = pd.read_csv('../csv/video_games.csv')
data = data.dropna().copy(deep = True)

genre_data = data.assign(Genre=data['Metadata.Genres'].str.split(',')).explode('Genre')

genre_publisher_count = genre_data.groupby(['Genre']).size().reset_index(name='Count')

nearest = alt.selection(type='single', nearest=True, on='mouseover',
                        fields=['Genre'], empty='none')

chart1 = alt.Chart(genre_publisher_count).mark_bar().encode(
    x='Count',
    y='Genre',
    color=alt.condition(nearest, 'Genre', alt.value('lightgrey')),
    tooltip=['Genre', alt.Tooltip('Count', title='Games in the Genre')]
).add_selection(
    nearest
).properties(
    title = 'The Number of Games for Each Genre',
    width=800,
    height=500
)


   Use 'selection_point()' or 'selection_interval()' instead; these functions also include more helpful docstrings.
        combined and should be specified using "selection_point()".


### Contextual

In [7]:
vg_sales = pd.read_csv('../csv/vgsales.csv')
vg_sales = vg_sales.dropna()
vg_sales['Year'] = vg_sales['Year'].astype(int) 
vg_sales = vg_sales[(vg_sales['Year'] >= 2004) & (vg_sales['Year'] <= 2010)]
vg_sales1 = vg_sales[(vg_sales["Global_Sales"] >= 0.01)]
vg_sample = vg_sales1.copy(deep = True)
vg_sample["Year"] = vg_sample['Year'].astype(str) + "-01-01"
vg_sample["Year"] = pd.to_datetime(vg_sample["Year"])

genre_dropdown = alt.binding_select(options= [None] + list(vg_sample.Genre.unique()), labels = ['All'] + list(vg_sample.Genre.unique()))
genre_select = alt.selection_point(fields=["Genre"], bind=genre_dropdown, name = 'Genre')


chart2 = alt.Chart(vg_sample).mark_circle(size=100).encode(
    x = 'Year:T',
    y = 'Global_Sales',
    color= 'Genre',
    tooltip = ['Name', 'Platform', 'Publisher','Global_Sales']
).add_params(genre_select).transform_filter(genre_select).interactive().properties(
    title = 'Global Sales of Video Games Between 2004 and 2010 By Genre',
    width=800,
    height=500
)

In [8]:
file_path = '../csv/vgsales.csv'
data = pd.read_csv(file_path)

publisher_sales = data.groupby('Publisher')['Global_Sales'].sum().reset_index()

top_publishers = publisher_sales.sort_values(by='Global_Sales', ascending=False).head(10)

chart3 = alt.Chart(top_publishers).mark_bar().encode(
    x='Global_Sales:Q',
    y=alt.Y('Publisher:N', sort='-x'),
    color='Global_Sales:Q',
    tooltip=['Global_Sales']
).properties(
    width=800,
    height=500,
    title='Top 10 Video Game Publishers by Global Sales'
)


In [9]:
path = '../json/'

chart1.save(path + 'fp3-1.json')
chart2.save(path + 'fp3-1-con1.json')
chart3.save(path + 'fp3-1-con2.json')