In [1]:
import json
import os
import matplotlib.pyplot as plt
import numpy
from sklearn import linear_model

In [2]:
def load_imdb_data(data, file):
    imdb_file = open(file, 'r')
    imdb = json.load(imdb_file)
    for item in imdb["movie_ratings"][0]["ratings"]:
        data.append(float(item["rating"]))


def load_sentimental_data(sentimental_data, file_name):
    sentimental_file = open(file_name, 'r')
    sentimental = json.load(sentimental_file)
    for item in sentimental["sentiments"]:
        if (float(item["sentiment"]) != 0):
            sentimental_data.append(float(item["sentiment"]))

In [5]:
if not os.path.exists('plots'):
    os.makedirs('plots')

imdb_data = [[], [], [], [], [], [], [], [], [], []]
sentiment_data = [[], [], [], [], [], [], [], [], [], []]
movies = ['alex of venice', 'beyond the reach', 'child 44', 'dead lands', 'felix and meira', 'monkey kingdom',
          'monsters', 'paul blart', 'true story', 'unfriended']
movie_names = ['Alex of Venice', 'Beyond the Reach', 'Child 44', 'The Dead Lands', 'Felix and Meira', 'Monkey Kingdom',
          'Monsters - Dark Continent', 'Paul Blart - Mall Cop 2', 'True Story', 'Unfriended']

for i in range(0, len(movies)):
    load_imdb_data(imdb_data[i], "imdb_data/" + movies[i] + ".txt")
    load_sentimental_data(sentiment_data[i], "zero_fitted_tweets/zero_fitted_tweet_" + movies[i] + ".txt")

# plot the rating tendency of the ten movies together using IMDB data
plot1, = plt.plot(imdb_data[0])
plot2, = plt.plot(imdb_data[1])
plot3, = plt.plot(imdb_data[2])
plot4, = plt.plot(imdb_data[3])
plot5, = plt.plot(imdb_data[4])
plot6, = plt.plot(imdb_data[5])
plot7, = plt.plot(imdb_data[6])
plot8, = plt.plot(imdb_data[7])
plot9, = plt.plot(imdb_data[8])
plot10, = plt.plot(imdb_data[9])

plt.title("The rating tendency of the ten movies from IMDB")
plt.xlabel('Time')
plt.ylabel('IMDB Rating')
plt.legend([plot1, plot2, plot3, plot4, plot5, plot6, plot7, plot8, plot9, plot10], (
    movie_names[0], movie_names[1], movie_names[2], movie_names[3], movie_names[4], movie_names[5], 
        movie_names[6], movie_names[7], movie_names[8], movie_names[9]),
           loc=1,
           prop={'size': 5})  # make legend
plt.savefig('plots/ten_movies')

In [6]:

def plot_tendency(imdb, tweet, movie_name, interval):
    # Average out the interval from 10 minutes to 'interval' (60 minutes)
    new_imdb = []
    to_average = interval/10
    for i in range(0, len(imdb), to_average):
        addition = len(imdb) - i
        if addition > to_average:
            addition = to_average
        total = 0
        for j in range(0, addition):
            total += imdb[i+j]
        new_imdb.append(total/to_average)

    new_tweet = []
    for i in range(0, len(tweet), to_average):
        addition = len(tweet) - i
        if addition > to_average:
            addition = to_average
        total = 0
        for j in range(0, addition):
            total += tweet[i+j]
        new_tweet.append(total/to_average)

    x_range = len(new_tweet)

    fig = plt.figure()
    my_xticks = ['']*(len(tweet)/to_average)
    my_xticks[0] = '04/16/2015 - 13:36:00'
    my_xticks[len(my_xticks)/2] = '04/18/2015 - 17:36:00'
    my_xticks[-1] = '04/20/2015 - 21:46:00'
    plt.xticks(range(0, (len(tweet)/to_average)), my_xticks)

    ax = fig.add_subplot(111)
    ax.plot(new_imdb, '-b', label='imdb_rating')
    ax2 = ax.twinx()
    ax2.plot(new_tweet, '-r', label='sentiment')
    
    # Fit the tweet data with linear regression
    regr = linear_model.LinearRegression()
    regr.fit(numpy.array(range(0, len(tweet)))[:, numpy.newaxis], numpy.array(tweet))
    
    ax2.plot(numpy.array(range(0, len(tweet)))[:, numpy.newaxis],
             regr.predict(numpy.array(range(0, len(tweet)))[:, numpy.newaxis]), color='green', linewidth=2,
             linestyle='--', label='fitted_sentiment')
    ax.legend(loc=2)
    ax2.legend(loc=1)
    
    # ax.grid()
    ax.set_ylim(-1, 11)
    # ax.set_ylim(min_rating - 0.5, max_rating + 0.5)
    ax2.set_ylim(-10, 110)
    ax.set_xlim(0, x_range)
    ax.set_xlabel("Time")
    ax.set_ylabel("IMDB Rating")
    ax2.set_ylabel("Sentiment Ratio")

    plt.title(movie_name)
    plt.savefig("plots/each_movie/" + movie_name)

In [7]:
if not os.path.exists('plots/each_movie'):
    os.makedirs('plots/each_movie')
    
interval = 60
for i in range(10):
    plot_tendency(imdb_data[i], sentiment_data[i], movie_names[i], interval)