# Movies DataSet Graphs #

In [44]:
%matplotlib inline
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn import linear_model
import statsmodels.formula.api as sm
import math
from pandas.stats.api import ols
from sklearn.preprocessing import scale
from sklearn.metrics import mean_squared_error
import sklearn.model_selection as m_sel
import plotly.offline as plot
import plotly.graph_objs as go
from sklearn import preprocessing
from sklearn.utils import shuffle
import statsmodels.stats.diagnostic as sms
plot.offline.init_notebook_mode(connected = True)

In [45]:
user_item = pd.read_csv('/home/user/Downloads/ml-100k/u.item', 
                 names = ['item id', 'movie title', 'release date', 'video release date', 'IMDb URL', 'unknown', 'Action',
                'Adventure', 'Animation', 'Childrens', 'Comedy', 'Crime', 'Documentary', 'Drama', 'Fantasy','Film Noir',
                'Horror', 'Musical', 'Mystery', 'Romance', 'Sci Fi', 'Thriller', 'War', 'Western'],
                sep= '|',
                header = None,
                encoding = "ISO-8859-1")
user_detail = pd.read_csv('/home/user/Downloads/ml-100k/u.user', 
                  sep = '|',
                  header = None,
                  names = ['user id','age','gender','occupation','zip code'])
user_data = pd.read_table('/home/user/Downloads/ml-100k/u.data',
                    header = None,
                    names = ['user id','item id','rating','timestamp'])


## Age Group Histogram ##

In [67]:
data = [go.Histogram(x=(user_detail.age), xbins = dict(start =0,end=100,size=8))]
layout = go.Layout(
    title='Histogram for Age Groups.',
    xaxis=dict(
        title='Age'
    ),
    yaxis=dict(
        title='Numbaer of users'
    )
)
fig = go.Figure(data=data, layout=layout)
plot.iplot(fig, filename='age group histogram')

In [48]:
occ_rat = pd.merge(user_data, user_detail, on = 'user id')
occ_rat = occ_rat.groupby('occupation').rating.mean()

## Occupation Vs Average Rating ##

In [50]:
data = [go.Bar(x = (occ_rat.index), y = (occ_rat.values))]
layout = go.Layout(
    title='Occupation vs Average Rating',
    xaxis=dict(
        title='Occupation'
    ),
    yaxis=dict(
        range = [0,4],
        title='Average ratings'
    )
)
fig = go.Figure(data=data, layout=layout)
plot.iplot(fig, filename='age group histogram')

## Rating Histogram ##

In [52]:
data = [go.Histogram(x=(user_data.rating))]
layout = go.Layout(
    title='Histogram for Rating ',
    xaxis=dict(
        title='Rating'
    ),
    yaxis=dict(
        title='Number of users'
    )
)
fig = go.Figure(data=data, layout=layout)
plot.iplot(fig, filename='Rating histogram')

## Average Rating over the year ## 

In [54]:
user_data['timestamp'] = pd.to_datetime(user_data['timestamp'],unit='s')
user_data['timestamp'] = pd.DatetimeIndex(user_data['timestamp']).date
year_rating = user_data.groupby('timestamp').rating.mean()

In [55]:
data = [go.Scatter(x = (year_rating.index), y = (year_rating.values))]
layout = go.Layout(
    title='Year vs Average Rating',
    xaxis=dict(
        title='Year'
    ),
    yaxis=dict(
        title='Average ratings'
    )
)
fig = go.Figure(data=data, layout=layout)
plot.iplot(fig, filename='Average Rating over year')

## Average Rating by Age and Gender ##

In [128]:
age_gender = pd.merge(user_data, user_detail, on = 'user id')
age_gender = pd.DataFrame(age_gender.groupby(['age','gender']).rating.mean())
temp = age_gender.unstack().fillna(0)
temp['age'] = temp.index
temp.age = pd.cut(temp.age,bins=range(0,90,10))
z = temp.groupby('age').mean()

In [129]:
data1 = go.Bar(x = (z.index), y = (z.rating.F), name='Female',
    marker=dict(
        color='rgb(49,130,189)'
    ))
data2 = go.Bar(x = (z.index), y = (z.rating.M), name='Male',
    marker=dict(
        color='rgb(204,204,204)'
    ))
data = [data1, data2] 
layout = go.Layout(
    title='Age and Gender vs Average Rating', 
    xaxis=dict(
        title='Age and Gender'
    ),
    yaxis=dict(
        range = [0,5],
        title='Average ratings'
    ),
    barmode = 'group'
)
fig = go.Figure(data=data, layout=layout)
plot.iplot(fig, filename='age group histogram')