# Rating

In [3]:
import pandas as pd
import missingno as msno
import numpy as np

In [4]:
rating = pd.read_csv(r"../data/rating.csv")

In [5]:
rating.columns = rating.columns.str.lower() # en minuscular
rating.columns = rating.columns.str.strip() # quitar espacios en blanco
rating.head()

Unnamed: 0,userid,movieid,rating,timestamp
0,1,2,3.5,2005-04-02 23:53:47
1,1,29,3.5,2005-04-02 23:31:16
2,1,32,3.5,2005-04-02 23:33:39
3,1,47,3.5,2005-04-02 23:32:07
4,1,50,3.5,2005-04-02 23:29:40


In [6]:
# Total de registros
len(rating)

20000263

In [7]:
# Verificamos que no haya nulos
rating.isna().sum()

userid       0
movieid      0
rating       0
timestamp    0
dtype: int64

In [8]:
# Cambiar el tipo de datos `object` de timestamp a datetime
rating["timestamp"] = pd.to_datetime(rating["timestamp"])

In [9]:
rating.head()

Unnamed: 0,userid,movieid,rating,timestamp
0,1,2,3.5,2005-04-02 23:53:47
1,1,29,3.5,2005-04-02 23:31:16
2,1,32,3.5,2005-04-02 23:33:39
3,1,47,3.5,2005-04-02 23:32:07
4,1,50,3.5,2005-04-02 23:29:40


In [10]:
rating.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20000263 entries, 0 to 20000262
Data columns (total 4 columns):
 #   Column     Dtype         
---  ------     -----         
 0   userid     int64         
 1   movieid    int64         
 2   rating     float64       
 3   timestamp  datetime64[ns]
dtypes: datetime64[ns](1), float64(1), int64(2)
memory usage: 610.4 MB


In [11]:
# Adicionamos la columna `year` y llenar con el año del timestamp
rating["year"] =  rating['timestamp'].dt.year
rating["month"] =  rating['timestamp'].dt.month
rating.head()

Unnamed: 0,userid,movieid,rating,timestamp,year,month
0,1,2,3.5,2005-04-02 23:53:47,2005,4
1,1,29,3.5,2005-04-02 23:31:16,2005,4
2,1,32,3.5,2005-04-02 23:33:39,2005,4
3,1,47,3.5,2005-04-02 23:32:07,2005,4
4,1,50,3.5,2005-04-02 23:29:40,2005,4


# DATA OPCION 1

Promedio y conteo en forma general agrupado unicamente por movieid

In [82]:
# Obtenemos el rating promedio y el conteo general 
rating_movies_promedio = rating.groupby(["movieid"])["rating"].mean()
rating_movies_conteo = rating.groupby(["movieid"])["rating"].count()


In [83]:
# Promedio de rating por pelicula de forma global en todos los años
rating_movies_promedio.head()

movieid
1    3.921240
2    3.211977
3    3.151040
4    2.861393
5    3.064592
Name: rating, dtype: float64

In [84]:
# Conteo de rating por pelicula de forma global en todos los años
rating_movies_conteo.head()

movieid
1    49695
2    22243
3    12735
4     2756
5    12161
Name: rating, dtype: int64

In [85]:
# union de promeido y conteo
rating_promedio_conteo = pd.merge(rating_movies_promedio, rating_movies_conteo, on="movieid", how="left")

In [86]:
rating_promedio_conteo.head()

Unnamed: 0_level_0,rating_x,rating_y
movieid,Unnamed: 1_level_1,Unnamed: 2_level_1
1,3.92124,49695
2,3.211977,22243
3,3.15104,12735
4,2.861393,2756
5,3.064592,12161


In [87]:
# renombrar columnas
rating_promedio_conteo = rating_promedio_conteo.rename(
    columns={
        "rating_x": "rating_promedio",
        "rating_y": "rating_conteo"
    }
)

In [88]:
rating_promedio_conteo.head()

Unnamed: 0_level_0,rating_promedio,rating_conteo
movieid,Unnamed: 1_level_1,Unnamed: 2_level_1
1,3.92124,49695
2,3.211977,22243
3,3.15104,12735
4,2.861393,2756
5,3.064592,12161


In [89]:
# Es el total de registros
rating_promedio_conteo["rating_conteo"].sum()
# Esta OK

np.int64(20000263)

In [90]:
rating_promedio_conteo.head()

Unnamed: 0_level_0,rating_promedio,rating_conteo
movieid,Unnamed: 1_level_1,Unnamed: 2_level_1
1,3.92124,49695
2,3.211977,22243
3,3.15104,12735
4,2.861393,2756
5,3.064592,12161


In [19]:
# Guardado de tabla dimension usuarios temporal
rating_promedio_conteo.to_csv(r"../data/partial_rating.csv")

# DATA OPCION 2
Promedio agrupado por movieid y year
Se procede a crear una tabla que las columnas sean los años y ahi el promedio 
Resultado -> archivo tipo insumo para tabla de hecho `partial_rating_year.csv`

In [133]:
df_year_promedio = rating.groupby(["movieid","year"]).agg({"rating": "mean"})

In [None]:
# df_year_promedio.reset_index(inplace=True)
# df_year_promedio.set_index("movieid", inplace=True)
df_year_promedio

Unnamed: 0_level_0,year,rating
movieid,Unnamed: 1_level_1,Unnamed: 2_level_1
1,1996,4.132270
1,1997,3.875424
1,1998,3.885799
1,1999,3.974688
1,2000,4.142609
...,...,...
131254,2015,4.000000
131256,2015,4.000000
131258,2015,2.500000
131260,2015,3.000000


In [53]:
type(df_year_promedio)

pandas.core.frame.DataFrame

In [111]:
df_pivot = df_year_promedio.pivot(columns='year', values=['rating'])

In [112]:
df_pivot.head()

Unnamed: 0_level_0,rating,rating,rating,rating,rating,rating,rating,rating,rating,rating,rating,rating,rating,rating,rating,rating,rating,rating,rating,rating,rating
year,1995,1996,1997,1998,1999,2000,2001,2002,2003,2004,...,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015
movieid,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
1,,4.13227,3.875424,3.885799,3.974688,4.142609,4.117698,4.047855,4.00468,3.86338,...,3.680774,3.672973,3.740781,3.766777,3.856772,3.861689,3.931421,3.946274,3.954945,3.860412
2,,3.56457,3.457698,3.263158,3.139094,3.146533,3.136856,3.077821,2.956271,2.887043,...,2.876154,2.923704,3.059659,3.044865,3.159884,3.178733,3.201903,3.266667,3.346552,3.128049
3,,3.413843,3.291299,3.10299,2.930788,3.024691,3.204023,3.021552,2.957346,2.920536,...,2.758454,2.990909,3.112245,3.102837,2.986364,3.051724,2.799451,3.064103,3.048387,2.854167
4,,3.030162,3.057803,2.590909,2.71,2.744382,2.755906,2.836735,2.564103,2.585859,...,2.53125,2.310811,2.52381,2.5,2.833333,2.944444,2.545455,3.1875,2.357143,2.0
5,,3.338714,3.11471,3.090909,3.070922,2.934859,3.1,3.004651,2.803867,2.797483,...,2.765337,2.633333,2.725849,2.864662,2.8125,2.913462,2.609804,2.770936,2.897059,2.842105


In [113]:
df_pivot = df_pivot.replace(np.nan, 0)

In [114]:
df_pivot.head()

Unnamed: 0_level_0,rating,rating,rating,rating,rating,rating,rating,rating,rating,rating,rating,rating,rating,rating,rating,rating,rating,rating,rating,rating,rating
year,1995,1996,1997,1998,1999,2000,2001,2002,2003,2004,...,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015
movieid,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
1,0.0,4.13227,3.875424,3.885799,3.974688,4.142609,4.117698,4.047855,4.00468,3.86338,...,3.680774,3.672973,3.740781,3.766777,3.856772,3.861689,3.931421,3.946274,3.954945,3.860412
2,0.0,3.56457,3.457698,3.263158,3.139094,3.146533,3.136856,3.077821,2.956271,2.887043,...,2.876154,2.923704,3.059659,3.044865,3.159884,3.178733,3.201903,3.266667,3.346552,3.128049
3,0.0,3.413843,3.291299,3.10299,2.930788,3.024691,3.204023,3.021552,2.957346,2.920536,...,2.758454,2.990909,3.112245,3.102837,2.986364,3.051724,2.799451,3.064103,3.048387,2.854167
4,0.0,3.030162,3.057803,2.590909,2.71,2.744382,2.755906,2.836735,2.564103,2.585859,...,2.53125,2.310811,2.52381,2.5,2.833333,2.944444,2.545455,3.1875,2.357143,2.0
5,0.0,3.338714,3.11471,3.090909,3.070922,2.934859,3.1,3.004651,2.803867,2.797483,...,2.765337,2.633333,2.725849,2.864662,2.8125,2.913462,2.609804,2.770936,2.897059,2.842105


In [116]:
type(df_pivot)

pandas.core.frame.DataFrame

In [118]:
df_pivot = df_pivot.reset_index()

In [119]:
df_pivot.head()

Unnamed: 0_level_0,movieid,rating,rating,rating,rating,rating,rating,rating,rating,rating,rating,rating,rating,rating,rating,rating,rating,rating,rating,rating,rating
year,Unnamed: 1_level_1,1995,1996,1997,1998,1999,2000,2001,2002,2003,...,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015
0,1,0.0,4.13227,3.875424,3.885799,3.974688,4.142609,4.117698,4.047855,4.00468,...,3.680774,3.672973,3.740781,3.766777,3.856772,3.861689,3.931421,3.946274,3.954945,3.860412
1,2,0.0,3.56457,3.457698,3.263158,3.139094,3.146533,3.136856,3.077821,2.956271,...,2.876154,2.923704,3.059659,3.044865,3.159884,3.178733,3.201903,3.266667,3.346552,3.128049
2,3,0.0,3.413843,3.291299,3.10299,2.930788,3.024691,3.204023,3.021552,2.957346,...,2.758454,2.990909,3.112245,3.102837,2.986364,3.051724,2.799451,3.064103,3.048387,2.854167
3,4,0.0,3.030162,3.057803,2.590909,2.71,2.744382,2.755906,2.836735,2.564103,...,2.53125,2.310811,2.52381,2.5,2.833333,2.944444,2.545455,3.1875,2.357143,2.0
4,5,0.0,3.338714,3.11471,3.090909,3.070922,2.934859,3.1,3.004651,2.803867,...,2.765337,2.633333,2.725849,2.864662,2.8125,2.913462,2.609804,2.770936,2.897059,2.842105


In [120]:
df_pivot.rename(columns={'movieid': 'movieid'}, inplace=True) 

In [121]:
df_pivot.head()

Unnamed: 0_level_0,movieid,rating,rating,rating,rating,rating,rating,rating,rating,rating,rating,rating,rating,rating,rating,rating,rating,rating,rating,rating,rating
year,Unnamed: 1_level_1,1995,1996,1997,1998,1999,2000,2001,2002,2003,...,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015
0,1,0.0,4.13227,3.875424,3.885799,3.974688,4.142609,4.117698,4.047855,4.00468,...,3.680774,3.672973,3.740781,3.766777,3.856772,3.861689,3.931421,3.946274,3.954945,3.860412
1,2,0.0,3.56457,3.457698,3.263158,3.139094,3.146533,3.136856,3.077821,2.956271,...,2.876154,2.923704,3.059659,3.044865,3.159884,3.178733,3.201903,3.266667,3.346552,3.128049
2,3,0.0,3.413843,3.291299,3.10299,2.930788,3.024691,3.204023,3.021552,2.957346,...,2.758454,2.990909,3.112245,3.102837,2.986364,3.051724,2.799451,3.064103,3.048387,2.854167
3,4,0.0,3.030162,3.057803,2.590909,2.71,2.744382,2.755906,2.836735,2.564103,...,2.53125,2.310811,2.52381,2.5,2.833333,2.944444,2.545455,3.1875,2.357143,2.0
4,5,0.0,3.338714,3.11471,3.090909,3.070922,2.934859,3.1,3.004651,2.803867,...,2.765337,2.633333,2.725849,2.864662,2.8125,2.913462,2.609804,2.770936,2.897059,2.842105


In [122]:
columns = [col[1] if isinstance(col, tuple) else col for col in df_pivot.columns]
columns[0] = "movieid"
df_pivot.columns = columns

In [123]:
year_cols = [c for c in df_pivot.columns if isinstance(c, str) and c.isdigit()]
for col in year_cols:
    df_pivot[col] = pd.to_numeric(df_pivot[col], errors='coerce')

In [124]:
df_pivot.head()

Unnamed: 0,movieid,1995,1996,1997,1998,1999,2000,2001,2002,2003,...,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015
0,1,0.0,4.13227,3.875424,3.885799,3.974688,4.142609,4.117698,4.047855,4.00468,...,3.680774,3.672973,3.740781,3.766777,3.856772,3.861689,3.931421,3.946274,3.954945,3.860412
1,2,0.0,3.56457,3.457698,3.263158,3.139094,3.146533,3.136856,3.077821,2.956271,...,2.876154,2.923704,3.059659,3.044865,3.159884,3.178733,3.201903,3.266667,3.346552,3.128049
2,3,0.0,3.413843,3.291299,3.10299,2.930788,3.024691,3.204023,3.021552,2.957346,...,2.758454,2.990909,3.112245,3.102837,2.986364,3.051724,2.799451,3.064103,3.048387,2.854167
3,4,0.0,3.030162,3.057803,2.590909,2.71,2.744382,2.755906,2.836735,2.564103,...,2.53125,2.310811,2.52381,2.5,2.833333,2.944444,2.545455,3.1875,2.357143,2.0
4,5,0.0,3.338714,3.11471,3.090909,3.070922,2.934859,3.1,3.004651,2.803867,...,2.765337,2.633333,2.725849,2.864662,2.8125,2.913462,2.609804,2.770936,2.897059,2.842105


In [125]:
df_pivot.reset_index()

Unnamed: 0,index,movieid,1995,1996,1997,1998,1999,2000,2001,2002,...,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015
0,0,1,0.0,4.132270,3.875424,3.885799,3.974688,4.142609,4.117698,4.047855,...,3.680774,3.672973,3.740781,3.766777,3.856772,3.861689,3.931421,3.946274,3.954945,3.860412
1,1,2,0.0,3.564570,3.457698,3.263158,3.139094,3.146533,3.136856,3.077821,...,2.876154,2.923704,3.059659,3.044865,3.159884,3.178733,3.201903,3.266667,3.346552,3.128049
2,2,3,0.0,3.413843,3.291299,3.102990,2.930788,3.024691,3.204023,3.021552,...,2.758454,2.990909,3.112245,3.102837,2.986364,3.051724,2.799451,3.064103,3.048387,2.854167
3,3,4,0.0,3.030162,3.057803,2.590909,2.710000,2.744382,2.755906,2.836735,...,2.531250,2.310811,2.523810,2.500000,2.833333,2.944444,2.545455,3.187500,2.357143,2.000000
4,4,5,0.0,3.338714,3.114710,3.090909,3.070922,2.934859,3.100000,3.004651,...,2.765337,2.633333,2.725849,2.864662,2.812500,2.913462,2.609804,2.770936,2.897059,2.842105
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
26739,26739,131254,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,4.000000
26740,26740,131256,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,4.000000
26741,26741,131258,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,2.500000
26742,26742,131260,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,3.000000


In [129]:
df_pivot.set_index("movieid", drop=True)

Unnamed: 0_level_0,1995,1996,1997,1998,1999,2000,2001,2002,2003,2004,...,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015
movieid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,0.0,4.132270,3.875424,3.885799,3.974688,4.142609,4.117698,4.047855,4.004680,3.863380,...,3.680774,3.672973,3.740781,3.766777,3.856772,3.861689,3.931421,3.946274,3.954945,3.860412
2,0.0,3.564570,3.457698,3.263158,3.139094,3.146533,3.136856,3.077821,2.956271,2.887043,...,2.876154,2.923704,3.059659,3.044865,3.159884,3.178733,3.201903,3.266667,3.346552,3.128049
3,0.0,3.413843,3.291299,3.102990,2.930788,3.024691,3.204023,3.021552,2.957346,2.920536,...,2.758454,2.990909,3.112245,3.102837,2.986364,3.051724,2.799451,3.064103,3.048387,2.854167
4,0.0,3.030162,3.057803,2.590909,2.710000,2.744382,2.755906,2.836735,2.564103,2.585859,...,2.531250,2.310811,2.523810,2.500000,2.833333,2.944444,2.545455,3.187500,2.357143,2.000000
5,0.0,3.338714,3.114710,3.090909,3.070922,2.934859,3.100000,3.004651,2.803867,2.797483,...,2.765337,2.633333,2.725849,2.864662,2.812500,2.913462,2.609804,2.770936,2.897059,2.842105
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
131254,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,4.000000
131256,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,4.000000
131258,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,2.500000
131260,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,3.000000


In [130]:
# Guardado de tabla de dimension

df_pivot.to_csv(r"../data/partial_rating_year.csv")

# DATA OPCION 3
Promedio y conteo agrupado por movieid, userid, year, month
Resultado -> archivo tipo tabla de dimension

### Esto es para una tabla de dimensión `d_rating`
Contendra la pelicula agrupado por usuario, año y por mes

In [39]:
# Obtenemos el rating promedio por pelicula, año y mes
grupo = rating.groupby(["movieid","userid", "year","month"])["rating"]
rating_movies_year_month_promedio = grupo.mean()
rating_movies_year_month_conteo = grupo.count()

In [40]:
# Deberia ser una table dimension
rating_movies_year_month_promedio.head()

movieid  userid  year  month
1        3       1999  12       4.0
         6       1997  3        5.0
         8       1996  6        4.0
         10      1999  11       4.0
         11      2009  1        4.5
Name: rating, dtype: float64

In [41]:
rating_movies_year_month_conteo.head()

movieid  userid  year  month
1        3       1999  12       1
         6       1997  3        1
         8       1996  6        1
         10      1999  11       1
         11      2009  1        1
Name: rating, dtype: int64

In [42]:
# union de promeido y conteo
dim_rating = pd.merge(rating_movies_year_month_promedio, rating_movies_year_month_conteo, on=["movieid","userid", "year","month"], how="left")

In [43]:
dim_rating.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,rating_x,rating_y
movieid,userid,year,month,Unnamed: 4_level_1,Unnamed: 5_level_1
1,3,1999,12,4.0,1
1,6,1997,3,5.0,1
1,8,1996,6,4.0,1
1,10,1999,11,4.0,1
1,11,2009,1,4.5,1


In [44]:
# renombrar columnas
dim_rating = dim_rating.rename(
    columns={
        "rating_x": "promedio",
        "rating_y": "conteo"
    }
)

In [45]:
dim_rating.reset_index(inplace=True)

In [46]:
dim_rating

Unnamed: 0,movieid,userid,year,month,promedio,conteo
0,1,3,1999,12,4.0,1
1,1,6,1997,3,5.0,1
2,1,8,1996,6,4.0,1
3,1,10,1999,11,4.0,1
4,1,11,2009,1,4.5,1
...,...,...,...,...,...,...
20000258,131254,79570,2015,3,4.0,1
20000259,131256,79570,2015,3,4.0,1
20000260,131258,28906,2015,3,2.5,1
20000261,131260,65409,2015,3,3.0,1


In [47]:
dim_rating.head()

Unnamed: 0,movieid,userid,year,month,promedio,conteo
0,1,3,1999,12,4.0,1
1,1,6,1997,3,5.0,1
2,1,8,1996,6,4.0,1
3,1,10,1999,11,4.0,1
4,1,11,2009,1,4.5,1


In [48]:
dim_rating["ratingid"] = range(1, len(dim_rating) + 1)

In [49]:
dim_rating.set_index("ratingid", inplace=True)

In [50]:
dim_rating.head()

Unnamed: 0_level_0,movieid,userid,year,month,promedio,conteo
ratingid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1,1,3,1999,12,4.0,1
2,1,6,1997,3,5.0,1
3,1,8,1996,6,4.0,1
4,1,10,1999,11,4.0,1
5,1,11,2009,1,4.5,1


In [51]:
# Guardado de tabla de dimension
dim_rating.to_csv(r"../data/d_rating.csv")