In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns


In [2]:
movies_df = pd.read_csv('../../Dataset/ml-latest-small/movies.csv')
ratings_df = pd.read_csv('../../Dataset/ml-latest-small/ratings.csv')
movies_df.head()

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy


In [3]:
ratings_df.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,1,4.0,964982703
1,1,3,4.0,964981247
2,1,6,4.0,964982224
3,1,47,5.0,964983815
4,1,50,5.0,964982931


In [4]:
#checking for how many movies there are
print(ratings_df['movieId'].nunique())
print(movies_df['movieId'].nunique())

9724
9742


In [5]:
#making new df for calculating average rating without unnecessary features
df_for_avg_rat = ratings_df.drop(columns=['userId', 'timestamp'])
df_for_avg_rat.head()

Unnamed: 0,movieId,rating
0,1,4.0
1,3,4.0
2,6,4.0
3,47,5.0
4,50,5.0


In [6]:
#sorted by movieId
df_for_avg_rat.sort_values(by='movieId', inplace=True)
df_for_avg_rat

Unnamed: 0,movieId,rating
0,1,4.0
81531,1,4.0
30517,1,3.5
81082,1,4.0
30601,1,3.0
...,...,...
27256,193581,4.0
27257,193583,3.5
27258,193585,3.5
27259,193587,3.5


In [7]:
#creating average and total rating dictionaries
avr_rat = {}
tot_rat = {}
temp = 0
counter = 0
for i in range(len(df_for_avg_rat)):
    if df_for_avg_rat['movieId'].iloc[i] == df_for_avg_rat['movieId'].iloc[-1] or df_for_avg_rat['movieId'].iloc[i] == df_for_avg_rat['movieId'].iloc[i+1]:
        temp += df_for_avg_rat['rating'].iloc[i]
        counter+=1
    elif df_for_avg_rat['movieId'].iloc[i] != df_for_avg_rat['movieId'].iloc[i+1]:
        temp += df_for_avg_rat['rating'].iloc[i]
        avr_rat[df_for_avg_rat['movieId'].iloc[i]] = temp/counter
        tot_rat[df_for_avg_rat['movieId'].iloc[i]] = counter
        temp = 0
        counter = 0
    else:
        break

  avr_rat[df_for_avg_rat['movieId'].iloc[i]] = temp/counter


In [9]:
#tot_rat dictionary converted to DF
tot_rat_df = pd.DataFrame.from_dict(tot_rat, orient='index')
tot_rat_df.columns = ['totalRating']
tot_rat_df['movieId'] = tot_rat_df.index
tot_rat_df = tot_rat_df.reindex(columns=['movieId', 'totalRating'])
tot_rat_df

Unnamed: 0,movieId,totalRating
1,1,214
2,2,109
3,3,51
4,4,6
5,5,48
...,...,...
193579,193579,0
193581,193581,0
193583,193583,0
193585,193585,0


In [10]:
#avr_rat dictionary converted to DF
avr_rat_df = pd.DataFrame.from_dict(avr_rat, orient='index')
avr_rat_df.columns = ['averageRating']
avr_rat_df['movieId'] = avr_rat_df.index
avr_rat_df = avr_rat_df.reindex(columns=['movieId', 'averageRating'])
avr_rat_df

Unnamed: 0,movieId,averageRating
1,1,3.939252
2,2,3.463303
3,3,3.323529
4,4,2.750000
5,5,3.135417
...,...,...
193579,193579,inf
193581,193581,inf
193583,193583,inf
193585,193585,inf


In [11]:
#deleting the movies that have less than 20 ratings
filtered_trd = tot_rat_df[tot_rat_df['totalRating'] >= 20]
filtered_trd.sort_values(by=['totalRating'])

Unnamed: 0,movieId,totalRating
830,830,20
2013,2013,20
2067,2067,20
262,262,20
383,383,20
...,...,...
2571,2571,277
593,593,278
296,296,306
318,318,316


In [12]:
#merged both DF's
ratingsWoUnusedDF = pd.merge(avr_rat_df, filtered_trd, on='movieId')
ratingsWoUnusedDF.sort_values(by=['totalRating'])

Unnamed: 0,movieId,averageRating,totalRating
217,830,3.225000,20
486,2013,3.250000,20
498,2067,4.350000,20
83,262,4.100000,20
129,383,3.250000,20
...,...,...,...
592,2571,4.207581,277
178,593,4.176259,278
94,296,4.210784,306
101,318,4.443038,316


Genre Part

In [13]:
movies_df.head()

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy


In [14]:
genres = movies_df['genres'].str.get_dummies('|')
genres

Unnamed: 0,(no genres listed),Action,Adventure,Animation,Children,Comedy,Crime,Documentary,Drama,Fantasy,Film-Noir,Horror,IMAX,Musical,Mystery,Romance,Sci-Fi,Thriller,War,Western
0,0,0,1,1,1,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0
1,0,0,1,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0
3,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0
4,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9737,0,1,0,1,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0
9738,0,0,0,1,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0
9739,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0
9740,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [15]:
movies_df = pd.concat([movies_df,genres], axis=1)
movies_df.drop(columns='genres', axis=1, inplace=True)
movies_df

Unnamed: 0,movieId,title,(no genres listed),Action,Adventure,Animation,Children,Comedy,Crime,Documentary,...,Film-Noir,Horror,IMAX,Musical,Mystery,Romance,Sci-Fi,Thriller,War,Western
0,1,Toy Story (1995),0,0,1,1,1,1,0,0,...,0,0,0,0,0,0,0,0,0,0
1,2,Jumanji (1995),0,0,1,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,3,Grumpier Old Men (1995),0,0,0,0,0,1,0,0,...,0,0,0,0,0,1,0,0,0,0
3,4,Waiting to Exhale (1995),0,0,0,0,0,1,0,0,...,0,0,0,0,0,1,0,0,0,0
4,5,Father of the Bride Part II (1995),0,0,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9737,193581,Black Butler: Book of the Atlantic (2017),0,1,0,1,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
9738,193583,No Game No Life: Zero (2017),0,0,0,1,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
9739,193585,Flint (2017),0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
9740,193587,Bungo Stray Dogs: Dead Apple (2018),0,1,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [48]:
totalGenre = noGenres.sum()
totalGenre

Action         1828
Adventure      1263
Animation       611
Children        664
Comedy         3756
Crime          1199
Documentary     440
Drama          4361
Fantasy         779
Film-Noir        87
Horror          978
IMAX            158
Musical         334
Mystery         573
Romance        1596
Sci-Fi          980
Thriller       1894
War             382
Western         167
dtype: int64

In [32]:
movies_df

Unnamed: 0,movieId,title,Action,Adventure,Animation,Children,Comedy,Crime,Documentary,Drama,...,Film-Noir,Horror,IMAX,Musical,Mystery,Romance,Sci-Fi,Thriller,War,Western
0,1,Toy Story (1995),0,1,1,1,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,2,Jumanji (1995),0,1,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,3,Grumpier Old Men (1995),0,0,0,0,1,0,0,0,...,0,0,0,0,0,1,0,0,0,0
3,4,Waiting to Exhale (1995),0,0,0,0,1,0,0,1,...,0,0,0,0,0,1,0,0,0,0
4,5,Father of the Bride Part II (1995),0,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9737,193581,Black Butler: Book of the Atlantic (2017),1,0,1,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
9738,193583,No Game No Life: Zero (2017),0,0,1,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
9739,193585,Flint (2017),0,0,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0
9740,193587,Bungo Stray Dogs: Dead Apple (2018),1,0,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [33]:
noGenres = movies_df.drop(columns=['movieId','title'], axis=1)

In [34]:
noGenres

Unnamed: 0,Action,Adventure,Animation,Children,Comedy,Crime,Documentary,Drama,Fantasy,Film-Noir,Horror,IMAX,Musical,Mystery,Romance,Sci-Fi,Thriller,War,Western
0,0,1,1,1,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0
1,0,1,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0
3,0,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0
4,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9737,1,0,1,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0
9738,0,0,1,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0
9739,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0
9740,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [16]:
movieDataFrame = pd.merge(ratingsWoUnusedDF, movies_df, on ='movieId')
movieDataFrame.drop(columns='(no genres listed)', axis=1, inplace=True)
movieDataFrame

Unnamed: 0,movieId,averageRating,totalRating,title,Action,Adventure,Animation,Children,Comedy,Crime,...,Film-Noir,Horror,IMAX,Musical,Mystery,Romance,Sci-Fi,Thriller,War,Western
0,1,3.939252,214,Toy Story (1995),0,1,1,1,1,0,...,0,0,0,0,0,0,0,0,0,0
1,2,3.463303,109,Jumanji (1995),0,1,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
2,3,3.323529,51,Grumpier Old Men (1995),0,0,0,0,1,0,...,0,0,0,0,0,1,0,0,0,0
3,5,3.135417,48,Father of the Bride Part II (1995),0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
4,6,3.985149,101,Heat (1995),1,0,0,0,0,1,...,0,0,0,0,0,0,0,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1230,148626,4.120000,25,"Big Short, The (2015)",0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1231,152081,4.016129,31,Zootopia (2016),1,1,1,1,1,0,...,0,0,0,0,0,0,0,0,0,0
1232,164179,4.140000,25,Arrival (2016),0,0,0,0,0,0,...,0,0,0,0,0,0,1,0,0,0
1233,166528,4.076923,26,Rogue One: A Star Wars Story (2016),1,1,0,0,0,0,...,0,0,0,0,0,0,1,0,0,0


In [55]:
pattern = '\((\d{4})\)'
movieDataFrame['year'] = movieDataFrame.title.str.extract(pattern, expand=False)
movieDataFrame.drop(columns=['movieId','totalRating','title'], axis=1, inplace=True)


AttributeError: 'DataFrame' object has no attribute 'title'

In [56]:
movieDataFrame

Unnamed: 0,averageRating,Action,Adventure,Animation,Children,Comedy,Crime,Documentary,Drama,Fantasy,...,Horror,IMAX,Musical,Mystery,Romance,Sci-Fi,Thriller,War,Western,year
0,3.939252,0,1,1,1,1,0,0,0,1,...,0,0,0,0,0,0,0,0,0,1995
1,3.463303,0,1,0,1,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,1995
2,3.323529,0,0,0,0,1,0,0,0,0,...,0,0,0,0,1,0,0,0,0,1995
3,3.135417,0,0,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1995
4,3.985149,1,0,0,0,0,1,0,0,0,...,0,0,0,0,0,0,1,0,0,1995
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1230,4.120000,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,2015
1231,4.016129,1,1,1,1,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,2016
1232,4.140000,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,2016
1233,4.076923,1,1,0,0,0,0,0,0,1,...,0,0,0,0,0,1,0,0,0,2016


In [17]:
safeMovieId = ratingsWoUnusedDF['movieId']
safeMovieId = pd.DataFrame(safeMovieId)
ratings_df = pd.merge(ratings_df, safeMovieId, on = 'movieId')
ratings_df.drop(columns='timestamp', axis=1, inplace=True)
ratings_df

Unnamed: 0,userId,movieId,rating
0,1,1,4.0
1,5,1,4.0
2,7,1,4.5
3,15,1,2.5
4,17,1,4.5
...,...,...,...
66653,572,2013,1.0
66654,577,2013,3.0
66655,590,2013,2.5
66656,597,2013,3.0


In [18]:
ratings_df.sort_values(by='userId', inplace=True)

Buradan itibaren kullanıcıların her genre için verdiği ortalama rating hesaplanacak

In [19]:
ratings_df

Unnamed: 0,userId,movieId,rating
0,1,1,4.0
10766,1,2046,4.0
10790,1,2054,4.0
10858,1,2058,5.0
10888,1,2078,5.0
...,...,...,...
34501,610,5989,3.5
39792,610,1358,4.5
17949,610,2288,5.0
36555,610,45517,3.0


In [20]:
genreCalcDF = pd.DataFrame(movieDataFrame)
genreCalcDF.drop(columns=['averageRating','totalRating','title'], axis=1, inplace=True)
genreCalcDF

Unnamed: 0,movieId,Action,Adventure,Animation,Children,Comedy,Crime,Documentary,Drama,Fantasy,Film-Noir,Horror,IMAX,Musical,Mystery,Romance,Sci-Fi,Thriller,War,Western
0,1,0,1,1,1,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0
1,2,0,1,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0
2,3,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0
3,5,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,6,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1230,148626,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0
1231,152081,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1232,164179,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0
1233,166528,1,1,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0


In [21]:
genreCalcDF = pd.merge(ratings_df, genreCalcDF, on = 'movieId')

In [22]:
columns = genreCalcDF.columns
columns = columns.drop(['movieId', 'rating'])
avrGenRatPerUserDF = pd.DataFrame(columns=columns)
avrGenRatPerUserDF

Unnamed: 0,userId,Action,Adventure,Animation,Children,Comedy,Crime,Documentary,Drama,Fantasy,Film-Noir,Horror,IMAX,Musical,Mystery,Romance,Sci-Fi,Thriller,War,Western


In [27]:
for i in range(1,20):
    genreCalcDF[columns[i]] = genreCalcDF[columns[i]] * genreCalcDF['rating']
genreCalcDF.sort_values('userId', inplace=True)
genreCalcDF

Unnamed: 0,userId,movieId,rating,Action,Adventure,Animation,Children,Comedy,Crime,Documentary,...,Film-Noir,Horror,IMAX,Musical,Mystery,Romance,Sci-Fi,Thriller,War,Western
0,1,1,4.0,0.0,4.0,4.0,4.0,4.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
12240,1,1291,5.0,5.0,5.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
12380,1,1258,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
12489,1,1377,3.0,3.0,0.0,0.0,0.0,0.0,3.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
12549,1,1206,5.0,0.0,0.0,0.0,0.0,0.0,5.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,5.0,5.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
51516,610,5903,3.5,3.5,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,3.5,3.5,0.0,0.0
63980,610,1343,3.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.5,0.0,0.0
51550,610,44555,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,3.0,0.0,3.0,0.0,0.0
66192,610,94959,4.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0


In [28]:
genreCalcDF.drop(columns=['movieId','rating'], axis=1, inplace=True)
genreCalcDF

Unnamed: 0,userId,Action,Adventure,Animation,Children,Comedy,Crime,Documentary,Drama,Fantasy,Film-Noir,Horror,IMAX,Musical,Mystery,Romance,Sci-Fi,Thriller,War,Western
0,1,0.0,4.0,4.0,4.0,4.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
12240,1,5.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
12380,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
12489,1,3.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
12549,1,0.0,0.0,0.0,0.0,0.0,5.0,0.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,5.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
51516,610,3.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.5,3.5,0.0,0.0
63980,610,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.5,0.0,0.0
51550,610,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,3.0,0.0,0.0
66192,610,0.0,0.0,0.0,0.0,4.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0


In [25]:
"""genreCalcDF.to_csv('/csv/x_u.csv')"""

"genreCalcDF.to_csv('/csv/x_u.csv')"

In [29]:
genreCalcDF.replace(0, np.nan, inplace=True)
genreCalcDF
grouped = genreCalcDF.groupby(['userId'])
def sum_and_divide(x):
    return x.sum() / x.notnull().sum()

result = grouped.apply(sum_and_divide)
result

Unnamed: 0_level_0,userId,Action,Adventure,Animation,Children,Comedy,Crime,Documentary,Drama,Fantasy,Film-Noir,Horror,IMAX,Musical,Mystery,Romance,Sci-Fi,Thriller,War,Western
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
1,1.0,4.328947,4.455882,4.764706,4.760000,4.328571,4.361111,,4.480769,4.470588,5.000000,3.300000,,4.777778,4.400000,4.277778,4.243243,4.276596,4.500000,4.285714
2,2.0,3.954545,4.166667,,,4.200000,4.000000,,3.933333,,,3.000000,3.750000,,4.000000,4.500000,3.875000,3.888889,4.500000,3.500000
3,3.0,3.125000,3.357143,0.500000,0.500000,0.500000,0.500000,,0.500000,2.833333,,4.000000,,0.500000,,0.500000,3.250000,3.625000,0.500000,
4,4.0,3.190476,3.760000,4.000000,4.111111,3.522388,3.863636,,3.424242,4.066667,3.666667,4.333333,3.000000,4.076923,3.000000,3.378378,2.727273,3.250000,3.000000,3.250000
5,5.0,3.111111,3.250000,4.333333,4.111111,3.428571,3.727273,,3.772727,4.142857,,3.000000,3.666667,4.400000,4.000000,3.000000,2.500000,3.555556,3.333333,3.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
606,606.0,3.298165,3.550459,3.800000,3.560606,3.505848,3.816176,4.000000,3.828571,3.663934,3.750000,3.538462,3.115385,3.794118,3.845238,3.736000,3.570312,3.637255,3.854167,3.375000
607,607.0,3.688525,3.435897,3.000000,3.363636,3.333333,3.769231,,3.981818,3.500000,,4.136364,5.000000,3.250000,4.600000,3.541667,3.303030,4.021739,4.000000,4.000000
608,608.0,3.447115,3.293548,3.179487,2.640625,2.936214,3.643519,3.333333,3.405128,3.088235,4.333333,3.568966,4.000000,2.833333,3.683673,2.922619,3.402985,3.623596,3.593750,2.666667
609,609.0,3.090909,3.222222,3.000000,3.000000,3.400000,3.400000,,3.384615,3.000000,,4.000000,3.000000,,,3.333333,3.000000,3.250000,3.333333,4.000000


In [50]:
result.replace(np.nan, 0, inplace=True)
result.drop(columns='userId', axis=1, inplace=True)
result

Unnamed: 0_level_0,Action,Adventure,Animation,Children,Comedy,Crime,Documentary,Drama,Fantasy,Film-Noir,Horror,IMAX,Musical,Mystery,Romance,Sci-Fi,Thriller,War,Western
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
1,4.328947,4.455882,4.764706,4.760000,4.328571,4.361111,0.000000,4.480769,4.470588,5.000000,3.300000,0.000000,4.777778,4.400000,4.277778,4.243243,4.276596,4.500000,4.285714
2,3.954545,4.166667,0.000000,0.000000,4.200000,4.000000,0.000000,3.933333,0.000000,0.000000,3.000000,3.750000,0.000000,4.000000,4.500000,3.875000,3.888889,4.500000,3.500000
3,3.125000,3.357143,0.500000,0.500000,0.500000,0.500000,0.000000,0.500000,2.833333,0.000000,4.000000,0.000000,0.500000,0.000000,0.500000,3.250000,3.625000,0.500000,0.000000
4,3.190476,3.760000,4.000000,4.111111,3.522388,3.863636,0.000000,3.424242,4.066667,3.666667,4.333333,3.000000,4.076923,3.000000,3.378378,2.727273,3.250000,3.000000,3.250000
5,3.111111,3.250000,4.333333,4.111111,3.428571,3.727273,0.000000,3.772727,4.142857,0.000000,3.000000,3.666667,4.400000,4.000000,3.000000,2.500000,3.555556,3.333333,3.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
606,3.298165,3.550459,3.800000,3.560606,3.505848,3.816176,4.000000,3.828571,3.663934,3.750000,3.538462,3.115385,3.794118,3.845238,3.736000,3.570312,3.637255,3.854167,3.375000
607,3.688525,3.435897,3.000000,3.363636,3.333333,3.769231,0.000000,3.981818,3.500000,0.000000,4.136364,5.000000,3.250000,4.600000,3.541667,3.303030,4.021739,4.000000,4.000000
608,3.447115,3.293548,3.179487,2.640625,2.936214,3.643519,3.333333,3.405128,3.088235,4.333333,3.568966,4.000000,2.833333,3.683673,2.922619,3.402985,3.623596,3.593750,2.666667
609,3.090909,3.222222,3.000000,3.000000,3.400000,3.400000,0.000000,3.384615,3.000000,0.000000,4.000000,3.000000,0.000000,0.000000,3.333333,3.000000,3.250000,3.333333,4.000000


In [52]:
result.to_csv('./csv/x_u.csv')

In [57]:
movieDataFrame.to_csv('./csv/x_m.csv')