In [None]:
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

import warnings
warnings.filterwarnings('ignore')

In [None]:
df=pd.read_csv('../input/game-of-thrones-imdb-dataset/got_imdb.csv')
print('Shape : ',df.shape)
print('Columns : ', df.columns)
print(df.info())

In [None]:
df['SE']=''
for i in range(df.shape[0]):
    df.SE[i]='S'+str(df.Season[i])+'E'+str(df.Episode[i])

df.AirDate=pd.to_datetime(df.AirDate)
df['NewSeason']=df.Episode.apply(lambda x: 1 if x==1 else 0)
df.head()

In [None]:
#Popularity over time
plt.figure(figsize=(10,6))
plt.title('Popularity over time', fontsize=18)
sns.barplot(x=df.SE, y=df.Rating)
plt.xticks(ticks=df[df.NewSeason==1].index)
plt.xlabel('Episodes')
plt.show()

In [None]:
from wordcloud import WordCloud, STOPWORDS, ImageColorGenerator 
stopwords= set(STOPWORDS)

### Season in a Wordcloud

In [None]:
for season in df.Season.unique():
    text="".join(i for i in df[df.Season==season].Desc)
    wordcloud=WordCloud(stopwords=stopwords, background_color='black', colormap='coolwarm').generate(text)
    plt.figure(figsize=(8,4))
    plt.title(f'Season {season}')
    plt.imshow(wordcloud, interpolation='bilinear')
    plt.axis("off")
    plt.show()

In [None]:
freq={w:f for w,f in zip(df.Title, df.Rating)}
freq_c={w:f for w,f in zip(df.Title, df.Counts)}

## Wordcloud from Frequencies

### Highly rated Episodes

In [None]:
wordcloud=WordCloud(stopwords=stopwords, background_color='black', colormap='coolwarm')
wordcloud.generate_from_frequencies(frequencies=freq)
plt.figure(figsize=(10,8))
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis("off")
plt.show()

### Most reviewed episodes

In [None]:
wordcloud=WordCloud(stopwords=stopwords, background_color='black', colormap='coolwarm')
wordcloud.generate_from_frequencies(frequencies=freq_c)
plt.figure(figsize=(10,8))
plt.title(f'Season {season}')
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis("off")
plt.show()

## Color using color_func

In [None]:
import matplotlib
from PIL import Image

In [None]:
#color based on value
cmap=matplotlib.cm.RdYlGn   #select cmap#Normalize based on Volume
norm=matplotlib.colors.Normalize(vmin=df.Rating.min(), 
                                 vmax=df.Rating.max())#Define colors array
colors=[cmap(norm(i)) for i in df.Rating]

In [None]:
#color int to hex
def color_int_to_hex(code):
    r=int(list(code)[0]*255)
    g=int(list(code)[1]*255)
    b=int(list(code)[2]*255)
    a=int(list(code)[3]*255)

    #convert to hex
    r=hex(r).lstrip('0x')
    g=hex(g).lstrip('0x')
    b=hex(b).lstrip('0x')
    a=hex(a).lstrip('0x')

    #zero-padding for one digit
    r = (2 - len(r)) * '0' + r
    g = (2 - len(g)) * '0' + g
    b = (2 - len(b)) * '0' + b
    a = (2 - len(a)) * '0' + a

    code_hex='#' + r + g + b + a
    return code_hex

In [None]:
color_hex=[color_int_to_hex(i) for i in colors]

color_dict={w:c for w,c in zip(df.Title, color_hex)}

In [None]:
def color_func(word, *args, **kwargs):
    try:
        color = color_dict[word]
        #alpha = color_dict[word][1]
    except KeyError:
        color =  '#000000' # black
    return color

# Wordcloud Size: Number of Ratings, Color: Average Rating

In [None]:
wordcloud=WordCloud(stopwords=stopwords, background_color='black',color_func=color_func)
wordcloud.generate_from_frequencies(frequencies=freq_c)
plt.figure(figsize=(10,8))
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis("off")
plt.show()