# Importing Libraries

In [None]:
%matplotlib inline
import numpy as np
import pandas as pd 
import scipy as sp
import sklearn as sk # data mining tools
import matplotlib.pylab as plt # plotting
import seaborn as sns # advanced plotting
import ast
import collections
import re 
import six
from sklearn.preprocessing import MinMaxScaler
pd.options.display.max_colwidth = 100
pd.options.mode.chained_assignment = None

In [None]:
df = pd.read_csv('ted_main.csv')

# Filling Missing Values

In [None]:
df['speaker_occupation'].fillna(df['speaker_occupation'].mode()[0], inplace = True)

# Creating Funny Factor and Popularity

In [None]:
df['comm_sc'] = ( df['comments'] - df['comments'].min() ) / ( df['comments'].max() - df['comments'].min() )
df['views_sc'] = ( df['views'] - df['views'].min() ) / ( df['views'].max() - df['views'].min() )
df['ratings'] = df['ratings'].apply(lambda x: ast.literal_eval(x))
df['tags'] = df['tags'].apply(lambda x: ast.literal_eval(x))
df['ratings_count'] = 0

for i in range(df.shape[0]):
    cnt = 0
    for rev in range(0, 14):
        cnt = cnt + df['ratings'][i][rev].get('count')
    df.loc[(i, 'ratings_count')] = cnt
    
df['ratings_nbr_sc'] = ( df['ratings_count'] - df['ratings_count'].min() ) / ( df['ratings_count'].max() - df['ratings_count'].min() )
df['popularity'] = ( df['views_sc'] + df['comm_sc'] + df['ratings_nbr_sc']) / 3
df.drop(['comm_sc','views_sc', 'ratings_nbr_sc'], axis=1, inplace = True)

In [None]:
df_trans = pd.read_csv('transcripts.csv')
df_trans['laughter'] = 0
# Drop duplicates 
df_trans.drop_duplicates(keep = False,inplace = True)
# re_assign index after droping dupliacte rows 
df_trans.index = range(0,len(df_trans))

#iterate in each row and sum frequency of the word 'Laughter'
#append the value to 'laughter' col

word = 'Laughter'
for i in range(0,df_trans.shape[0]):
    count = 0
    input_tedtalk = df_trans['transcript'][i] 
    count = count + sum(1 for _ in re.finditer(r'\b%s\b' % re.escape(word), input_tedtalk))
    df_trans.loc[(i,'laughter')] = count

# frequency of laughter 
df_trans['Scaled_laugh']  = (df_trans['laughter'] - df_trans['laughter'].min()) / (df_trans['laughter'].max() - df_trans['laughter'].min())
# Merge the two datafarmes on column = url (inner join) and return a DF
df = pd.merge(df,df_trans, on = 'url', how='outer')
df.update(df[['Scaled_laugh','laughter']].fillna(0))
df['funny_factor'] = df['Scaled_laugh']
df.drop(['Scaled_laugh','laughter'], axis=1, inplace = True)

In [None]:
df['year'] = df['published_date'].apply(lambda x: pd.to_datetime(x, unit='s').year)

In [None]:
def chunkIt(seq, num):
    avg = len(seq) / float(num)
    out = []
    last = 0.0

    while last < len(seq):
        out.append(seq[int(last):int(last + avg)])
        last += avg

    return out

# Making Popularity classified

In [None]:
df['popularity_class'] = 0

In [None]:
#_0 = chunkIt(df.popularity.sort_values().tolist(), 3)[0][0]
_1 = chunkIt(df.popularity.sort_values().tolist(), 3)[0][-1]
_2 = chunkIt(df.popularity.sort_values().tolist(), 3)[1][0]
_3 = chunkIt(df.popularity.sort_values().tolist(), 3)[1][-1]
_4 = chunkIt(df.popularity.sort_values().tolist(), 3)[2][0]
#_5 = chunkIt(df.popularity.sort_values().tolist(), 3)[1][-1]

In [None]:
df.loc[((df.popularity >= 0)&(df.popularity <= _1)), 'popularity_class'] = 0
df.loc[((df.popularity >= _2)&(df.popularity <= _3)), 'popularity_class'] = 1
df.loc[((df.popularity >= _4)&(df.popularity <= 1)), 'popularity_class'] = 2

# Creating Occupations (Dummy)

In [None]:
df['Literature'] = 0
df['Art'] = 0
df['Economy_Politics'] = 0
df['Medicine'] = 0
df['Academy'] = 0
df['Engineering_Science'] = 0
df['Other_Occupations'] = 0

In [None]:
df.loc[(df.speaker_occupation == "Writer"), 'Literature'] = 1
df.loc[(df.speaker_occupation == "Journalist"), 'Literature'] = 1
df.loc[(df.speaker_occupation == "Author"), 'Literature'] = 1
df.loc[(df.speaker_occupation == "Philosopher"), 'Literature'] = 1
df.loc[(df.speaker_occupation == "Historian"), 'Literature'] = 1
df.loc[(df.speaker_occupation == "Poet"), 'Literature'] = 1
df.loc[(df.speaker_occupation == "Novelist"), 'Literature'] = 1
df.loc[(df.speaker_occupation == "Reporter"), 'Literature'] = 1
df.loc[(df.speaker_occupation == "Writer, activist"), 'Literature'] = 1
df.loc[(df.speaker_occupation == "Performance poet, multimedia artist"), 'Literature'] = 1
df.loc[(df.speaker_occupation == "Science writer"), 'Literature'] = 1

df.loc[(df.speaker_occupation == "Designer"), 'Art'] = 1
df.loc[(df.speaker_occupation == "Chef"), 'Art'] = 1
df.loc[(df.speaker_occupation == "Violinist"), 'Art'] = 1
df.loc[(df.speaker_occupation == "Producer"), 'Art'] = 1
df.loc[(df.speaker_occupation == "Cartoonist"), 'Art'] = 1
df.loc[(df.speaker_occupation == "Performance poet, multimedia artist"), 'Art'] = 1
df.loc[(df.speaker_occupation == "Photojournalist"), 'Art'] = 1
df.loc[(df.speaker_occupation == "Singer-songwriter"), 'Art'] = 1
df.loc[(df.speaker_occupation == "Artist"), 'Art'] = 1
df.loc[(df.speaker_occupation == "Architect"), 'Art'] = 1
df.loc[(df.speaker_occupation == "Photographer"), 'Art'] = 1
df.loc[(df.speaker_occupation == "Filmmaker"), 'Art'] = 1
df.loc[(df.speaker_occupation == "Musician"), 'Art'] = 1
df.loc[(df.speaker_occupation == "Singer/songwriter"), 'Art'] = 1
df.loc[(df.speaker_occupation == "Graphic designer"), 'Art'] = 1
df.loc[(df.speaker_occupation == "Techno-illusionist"), 'Art'] = 1
df.loc[(df.speaker_occupation == "Comedian"), 'Art'] = 1
df.loc[(df.speaker_occupation == "Musician, activist"), 'Art'] = 1
df.loc[(df.speaker_occupation == "Sculptor"), 'Art'] = 1

df.loc[(df.speaker_occupation == "Entrepreneur"), 'Economy_Politics'] = 1
df.loc[(df.speaker_occupation == "Environmentalist, futurist"), 'Economy_Politics'] = 1
df.loc[(df.speaker_occupation == "Investor and advocate for moral leadership"), 'Economy_Politics'] = 1
df.loc[(df.speaker_occupation == "Musician, activist"), 'Economy_Politics'] = 1
df.loc[(df.speaker_occupation == "Economist"), 'Economy_Politics'] = 1
df.loc[(df.speaker_occupation == "Activist"), 'Economy_Politics'] = 1
df.loc[(df.speaker_occupation == "Philanthropist"), 'Economy_Politics'] = 1
df.loc[(df.speaker_occupation == "Behavioral economist"), 'Economy_Politics'] = 1
df.loc[(df.speaker_occupation == "Writer, activist"), 'Economy_Politics'] = 1
df.loc[(df.speaker_occupation == "Climate advocate"), 'Economy_Politics'] = 1
df.loc[(df.speaker_occupation == "Legal activist"), 'Economy_Politics'] = 1
df.loc[(df.speaker_occupation == "Futurist"), 'Economy_Politics'] = 1
df.loc[(df.speaker_occupation == "Social entrepreneur"), 'Economy_Politics'] = 1

df.loc[(df.speaker_occupation == "Psychologist"), 'Medicine'] = 1
df.loc[(df.speaker_occupation == "Neuroscientist"), 'Medicine'] = 1
df.loc[(df.speaker_occupation == "Global health expert; data visionary"), 'Medicine'] = 1
df.loc[(df.speaker_occupation == "Social psychologist"), 'Medicine'] = 1
df.loc[(df.speaker_occupation == "Surgeon"), 'Medicine'] = 1
df.loc[(df.speaker_occupation == "Physician"), 'Medicine'] = 1

df.loc[(df.speaker_occupation == "Educator"), 'Academy'] = 1

df.loc[(df.speaker_occupation == "Roboticist"), 'Engineering_Science'] = 1
df.loc[(df.speaker_occupation == "Biologist"), 'Engineering_Science'] = 1
df.loc[(df.speaker_occupation == "Physicist"), 'Engineering_Science'] = 1
df.loc[(df.speaker_occupation == "Marine biologist"), 'Engineering_Science'] = 1
df.loc[(df.speaker_occupation == "Technologist"), 'Engineering_Science'] = 1
df.loc[(df.speaker_occupation == "Global health expert; data visionary"), 'Engineering_Science'] = 1
df.loc[(df.speaker_occupation == "Astronomer"), 'Engineering_Science'] = 1
df.loc[(df.speaker_occupation == "Oceanographer"), 'Engineering_Science'] = 1
df.loc[(df.speaker_occupation == "Engineer"), 'Engineering_Science'] = 1
df.loc[(df.speaker_occupation == "Computer scientist"), 'Engineering_Science'] = 1
df.loc[(df.speaker_occupation == "Inventor"), 'Engineering_Science'] = 1
df.loc[(df.speaker_occupation == "Futurist"), 'Engineering_Science'] = 1
df.loc[(df.speaker_occupation == "Mathematician"), 'Engineering_Science'] = 1
df.loc[(df.speaker_occupation == "Astrophysicist"), 'Engineering_Science'] = 1
df.loc[(df.speaker_occupation == "Evolutionary biologist"), 'Engineering_Science'] = 1
df.loc[(df.speaker_occupation == "Sound consultant"), 'Engineering_Science'] = 1
df.loc[(df.speaker_occupation == "Game designer"), 'Engineering_Science'] = 1
df.loc[(df.speaker_occupation == "Chemist"), 'Engineering_Science'] = 1
df.loc[(df.speaker_occupation == "Social Media Theorist"), 'Engineering_Science'] = 1
df.loc[(df.speaker_occupation == "Data scientist"), 'Engineering_Science'] = 1
df.loc[(df.speaker_occupation == "Tech visionary"), 'Engineering_Science'] = 1
df.loc[(df.speaker_occupation == "Paleontologist"), 'Engineering_Science'] = 1
df.loc[(df.speaker_occupation == "Researcher"), 'Engineering_Science'] = 1

In [None]:
occ_df = df[((df.Literature != 1) & (df.Art != 1)& (df.Economy_Politics != 1)\
                      & (df.Medicine != 1)& (df.Academy != 1)& (df.Engineering_Science != 1))]

In [None]:
dictionary = {"Author":"Literature","Actor":"Art", "researcher":"Engineering_Science",
             "Historian":"Literature", "Philosopher": "Literature", "Activist":"Economy_Politics" , "Robotics":"Engineering_Science",
             "engineer":"Engineering_Science", "composer":"Art", "Pianist":"Art", "Marketing":"Economy_Politics",
             "Public":"Economy_Politics", "fund":"Economy_Politics" , "psycho":"Medicine", "logist":"Engineering_Science",
             "computer":"Engineering_Science", "writer":"Literature", "expert":"Engineering_Science", "Industrial":"Engineering_Science",
             "artist":"Art","scientist":"Engineering_Science", "founder":"Economy_Politics", "specialist":'Engineering_Science',"music":"Art",
             "design":"Art", "physicist":"Engineering_Science", "educat":"Academy", "Mayor":"Economy_Politics", "President":"Economy_Politics",
             "art":"Art", "bio":"Engineering_Science", "tech":"Engineering_Science", "professor":"Academy", "math":"Engineering_Science",
             "cyber":"Engineering_Science", "capital":"Economy_Politics", "digit":"Engineering_Science", "entrepreneur":"Economy_Politics",
             "religi":"Economy_Politics", "genetic":"Engineering_Science", "futur":"Economy_Politics", "explorer":"Economy_Politics", 
             "journ":"Literature", "law":"Economy_Politics", "Global":"Economy_Politics", "advocate":"Economy_Politics", "company":"Economy_Politics",
             "story":"Literature", "novel":"Literature", "band":"Art", "photo":"Art", "arch":"Art", "chem":"Engineering_Science", "care":"Medicine",
             "visual":"Art", "innov":"Engineering_Science", "analy":"Engineering_Science"}

In [None]:
for keys in dictionary.keys():
    for index, row in occ_df.iterrows():
        myString = occ_df['speaker_occupation'][index].lower()
        if (myString.find(keys.lower()) > -1):
            (df.loc[(index, dictionary[keys])]) = 1

In [None]:
df['Other_Occupations'][((df.Literature != 1) & (df.Art != 1)& (df.Economy_Politics != 1)\
                      & (df.Medicine != 1)& (df.Academy != 1)& (df.Engineering_Science != 1))] = 1

# Creating Speaker and Tags Trends

In [None]:
speaker_trends_df = pd.read_csv("speakers_interest_over_time.csv")
tag_trends_df = pd.read_csv("tags_interest_over_time.csv")

In [None]:
def get_value(name):
    years_list = ['2006', '2007','2008','2009','2010','2011','2012','2013','2014','2015','2016','2017']
    result = 0
    row = speaker_trends_df[speaker_trends_df.main_speaker == name]
    for year in years_list:
        result = result + row[year].values[0]
    return float(result)/len(years_list)

In [None]:
df['speaker_trend'] = df['main_speaker'].apply(lambda x: get_value(x))

In [None]:
def get_value_list(tagsList):
    years_list = ['2006', '2007','2008','2009','2010','2011','2012','2013','2014','2015','2016','2017']
    total_result = 0
    for tag in tagsList:
        result = 0
        row = tag_trends_df[tag_trends_df.tags == tag]
        for year in years_list:
            result = result + row[year].values[0]
        total_result = total_result + (float(result)/len(years_list))
    return float(total_result)/len(tagsList)

In [None]:
#df['tags'] = df['tags'].apply(lambda x: ast.literal_eval(x))

In [None]:
df['tags_trend'] = df['tags'].apply(lambda x: get_value_list(x))

# Creating Weekdays (Dummy)

In [None]:
# Convert a timestamp 'published_date' value into date object then convert the value to the corresponding weekday
from datetime import datetime
import calendar 
df['Published_day'] = df['published_date'].apply(lambda x: datetime.fromtimestamp(x).strftime('%m/%d/%Y'))
df['Pweek_day']= (pd.to_datetime(df['Published_day'])).apply(lambda d: calendar.day_name[d.weekday()])

In [None]:
#creating new columns
df['Monday']=0
df['Tuesday']=0
df['Wednesday']=0
df['Thursday']=0
df['Friday']=0
df['Saturday']=0
df['Sunday']=0

In [None]:
#setting 1 only in the right cell
df.loc[df.Pweek_day == 'Monday', 'Monday'] = 1
df.loc[df.Pweek_day == 'Tuesday', 'Tuesday'] = 1
df.loc[df.Pweek_day == 'Wednesday', 'Wednesday'] = 1
df.loc[df.Pweek_day == 'Thursday', 'Thursday'] = 1
df.loc[df.Pweek_day == 'Friday', 'Friday'] = 1
df.loc[df.Pweek_day == 'Saturday', 'Saturday'] = 1
df.loc[df.Pweek_day == 'Sunday', 'Sunday'] = 1

In [None]:
#drop Pweek_day column
df.drop(['Pweek_day'], axis=1, inplace=True)

# Creating Events (Dummy)

In [None]:
values, counts = np.unique(df['event'], return_counts=True)

In [None]:
df['TEDx'] = 0
df['TED'] = 0
df['TED_Global'] = 0
df['TED_Other'] = 0
df['Non_TED_University'] = 0
df['Non_TED_Other'] = 0

In [None]:
df.loc[(df.event == "TED1984"), 'TED'] = 1
df.loc[(df.event == "TED1990"), 'TED'] = 1
df.loc[(df.event == "TED1994"), 'TED'] = 1
df.loc[(df.event == "TED1998"), 'TED'] = 1
df.loc[(df.event == "TED2001"), 'TED'] = 1
df.loc[(df.event == "TED2002"), 'TED'] = 1
df.loc[(df.event == "TED2003"), 'TED'] = 1
df.loc[(df.event == "TED2004"), 'TED'] = 1
df.loc[(df.event == "TED2005"), 'TED'] = 1
df.loc[(df.event == "TED2006"), 'TED'] = 1
df.loc[(df.event == "TED2007"), 'TED'] = 1
df.loc[(df.event == "TED2008"), 'TED'] = 1
df.loc[(df.event == "TED2009"), 'TED'] = 1
df.loc[(df.event == "TED2010"), 'TED'] = 1
df.loc[(df.event == "TED2011"), 'TED'] = 1
df.loc[(df.event == "TED2012"), 'TED'] = 1
df.loc[(df.event == "TED2012"), 'TED'] = 1
df.loc[(df.event == "TED2013"), 'TED'] = 1
df.loc[(df.event == "TED2014"), 'TED'] = 1
df.loc[(df.event == "TED2015"), 'TED'] = 1
df.loc[(df.event == "TED2016"), 'TED'] = 1
df.loc[(df.event == "TED2017"), 'TED'] = 1

In [None]:
oth_df = df[(df.TED != 1)]
values_, counts_ = np.unique(oth_df['event'], return_counts=True)

In [None]:
dico = {"TEDx":"TEDx","TEDGlobal":"TED_Global", "TED D":"TED_Other", "TED F":"TED_Other",\
        "TED P":"TED_Other", "TED R":"TED_Other", "TED S":"TED_Other", "TED T":"TED_Other", "TED i":"TED_Other",\
        "TEDD":"TED_Other", "TEDE":"TED_Other", "TEDF":"TED_Other", "TEDG":"TED_Other", "TEDH":"TED_Other", \
        "TEDI":"TED_Other", "TEDJ":"TED_Other", "TEDK":"TED_Other", "TEDL":"TED_Other", "TEDM":"TED_Other", \
        "TEDN":"TED_Other", "TEDO":"TED_Other", "TEDP":"TED_Other", "TEDQ":"TED_Other", "TEDR":"TED_Other", \
        "TEDS":"TED_Other", "TEDT":"TED_Other", "TEDU":"TED_Other", "TEDV":"TED_Other", "TEDW":"TED_Other", \
        "TEDY":"TED_Other", "TEDZ":"TED_Other", "TED-E":"TED_Other", \
        " University":"Non_TED_University",\
       "TED@":"TED"\
       }

In [None]:
for keys in dico.keys():
    for index, row in oth_df.iterrows():
        myString = oth_df['event'][index].lower()
        if (myString.find(keys.lower()) > -1):
            (df.loc[(index, dico[keys])]) = 1

In [None]:
df['Non_TED_Other'][((df.TEDx != 1) & (df.TED != 1)& (df.TED_Global != 1)\
                     & (df.TED_Other != 1)& (df.Non_TED_University != 1))] = 1

In [None]:
for i in range(df.shape[0]):
    if (df['TED_Global'][i] == 1 & df['TED_Other'][i] == 1):
        df['TED_Other'][i] = 0

# Creating Question and How columns

In [None]:
df['question']=0
for i in range (0,len(df)):
    if (("?") in df.loc[(i,'title')]):
        df.loc[(i,'question')] = 1
    else:
        df.loc[(i,'question')] = 0

In [None]:
df['how']=0
for i in range (0,len(df)):
    if (("how") in df.loc[(i,'title')]):
        df.loc[(i,'how')] = 1
    else:
        df.loc[(i,'how')] = 0
for i in range (0,len(df)):
    if (("How") in df.loc[(i,'title')]):
        df.loc[(i,'how')] = 1
    else:
        df.loc[(i,'how')] = 0

# Decision Tree

In [None]:
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score, f1_score, classification_report, precision_score
from sklearn.metrics import roc_curve, auc, roc_auc_score
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn import preprocessing

In [None]:
attributes = ['duration', 'num_speaker', 'Literature', 'Art' ,'Economy_Politics', 'Medicine',
             'Academy', 'Engineering_Science', 'Other_Occupations',
             'speaker_trend', 'tags_trend', 'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday','Sunday',
              'funny_factor', 'TEDx', 'TED', 'TED_Global', 'TED_Other', 'Non_TED_University', 'Non_TED_Other', 'how', 'question']

In [None]:
X = df[attributes].values
y = df['popularity_class']

X_train, X_test, y_train, y_test = train_test_split(X, y, 
                                                    test_size=0.5, 
                                                    random_state=100
                                                    ,stratify=y)

In [None]:
clf = DecisionTreeClassifier(criterion='entropy', max_depth=5, 
                             min_samples_split=60, min_samples_leaf=5)
clf.fit(X_train, y_train)

In [None]:
for col, imp in zip(attributes, clf.feature_importances_):
    print(col, imp)

In [None]:
import pydotplus
from sklearn import tree
from IPython.display import Image

In [None]:
import os
os.environ['PATH'] += os.pathsep + 'C:/Users/Pouria/Anaconda3/pkgs/graphviz-2.38-hfd603c8_2/Library/bin/graphviz'

In [None]:
dot_data = tree.export_graphviz(clf, out_file=None,  
                                feature_names=attributes,   
                                filled=True, rounded=True,  
                                special_characters=True,max_depth=3)  
graph = pydotplus.graph_from_dot_data(dot_data)  
Image(graph.create_png())

In [None]:
y_train_pred = clf.predict(X_train)

In [None]:
print('Precision %s' % precision_score(y_train, y_train_pred, average='micro'))
print('Accuracy %s' % accuracy_score(y_train, y_train_pred))
print('F1-score %s' % f1_score(y_train, y_train_pred, average='micro'))

In [None]:
y_test_pred = clf.predict(X_test)

In [None]:
print('Precision %s' % precision_score(y_test, y_test_pred, average='micro'))

In [None]:
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RandomizedSearchCV

In [None]:
def report(results, n_top=3):
    for i in range(1, n_top + 1):
        candidates = np.flatnonzero(results['rank_test_score'] == i)
        for candidate in candidates:
            print("Model with rank: {0}".format(i))
            print("Mean validation score: {0:.9f} (std: {1:.9f})".format(
                  results['mean_test_score'][candidate],
                  results['std_test_score'][candidate]))
            print("Parameters: {0}".format(results['params'][candidate]))
            print("")

In [None]:
param_list = {'criterion': ['gini','entropy'],
              'max_depth': [None] + list(np.arange(2, 6)),
              'min_samples_split': [2, 5, 10, 20, 30, 40, 50, 60, 70],
              'min_samples_leaf': [1, 5, 10, 20, 30, 40, 50, 60, 70]}

In [None]:
grid_search = GridSearchCV(clf, param_grid=param_list)
grid_search.fit(X_train, y_train)
clf = grid_search.best_estimator_

In [None]:
report(grid_search.cv_results_, n_top=5)

In [None]:
from sklearn.model_selection import cross_val_score

In [None]:
precision_cross_scores = cross_val_score(clf, X_test, y_test, cv=10, scoring='precision_micro')
print('Precision: %0.4f (+/- %0.2f)' % (precision_cross_scores.mean(), precision_cross_scores.std() * 2))

# Creating Predicted Column

In [None]:
df['predicted_class'] = -1

In [None]:
j = 0
for i, v in y_test.items():
    df.loc[(i, 'predicted_class')] = y_test_pred[j]
    j += 1

In [None]:
df[['popularity_class','predicted_class']].sample(15)

# Predicting one sample talk

In [None]:
sample = 1468
df_sample_class_2 = df.iloc[sample]
X_class_2 = df_sample_class_2[attributes].values
y_class_2 = df_sample_class_2['popularity_class']

In [None]:
X_class_2 = X_class_2.reshape(X_class_2.shape[0],1).T

In [None]:
y_sample_pred = clf.predict(X_class_2)

In [None]:
y_sample_pred[0]

In [None]:
df.iloc[sample]['popularity_class']

In [None]:
print("Actual class for talk %d is %d, and predicted class is %d"% (sample, df.iloc[sample]['popularity_class'], y_sample_pred[0]))