# Internet Movie Database (IMDb)

Project Description: Predicting the IMDb rating of a movie by building a regression model on IMDb dataset

In [67]:
import pandas as pd
from pandas import *
import numpy as np
from sklearn import preprocessing

Loading the pre-processed data set

In [68]:
imdb = pd.read_csv("/Users/Sahil/Documents/masters/ml_project/IMDb_dataset1.csv")
imdb.shape
imdb.head()

Unnamed: 0,color,num_critic_for_reviews,duration,director_facebook_likes,actor_3_facebook_likes,actor_1_facebook_likes,num_voted_users,num_user_for_reviews,language,country,...,War,Documentary,Sport,Crime,Horror,Mystery,Sci_Fi,Thriller,Animation,Biography
0,Color,723.0,178.0,0.0,855.0,1000.0,886204,3054.0,English,USA,...,0,0,0,0,0,0,1,0,0,0
1,Color,302.0,169.0,563.0,1000.0,40000.0,471220,1238.0,English,USA,...,0,0,0,0,0,0,0,0,0,0
2,Color,602.0,148.0,0.0,161.0,11000.0,275868,994.0,English,UK,...,0,0,0,0,0,0,0,1,0,0
3,Color,813.0,164.0,22000.0,23000.0,27000.0,1144337,2701.0,English,USA,...,0,0,0,0,0,0,0,1,0,0
4,Color,462.0,132.0,475.0,530.0,640.0,212204,738.0,English,USA,...,0,0,0,0,0,0,1,0,0,0


In [69]:
y = imdb['imdb_score']
del imdb['imdb_score']

Binarizing/Vectorizing the columns with text data

In [70]:
imdb = pd.get_dummies(imdb)
imdb.shape
imdb.head()

Unnamed: 0,num_critic_for_reviews,duration,director_facebook_likes,actor_3_facebook_likes,actor_1_facebook_likes,num_voted_users,num_user_for_reviews,actor_2_facebook_likes,movie_facebook_likes,Adventure,...,content_rating_Passed,content_rating_R,content_rating_TV-14,content_rating_TV-G,content_rating_TV-MA,content_rating_TV-PG,content_rating_TV-Y,content_rating_TV-Y7,content_rating_Unrated,content_rating_X
0,723.0,178.0,0.0,855.0,1000.0,886204,3054.0,936.0,33000,1,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,302.0,169.0,563.0,1000.0,40000.0,471220,1238.0,5000.0,0,1,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,602.0,148.0,0.0,161.0,11000.0,275868,994.0,393.0,85000,1,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,813.0,164.0,22000.0,23000.0,27000.0,1144337,2701.0,23000.0,164000,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,462.0,132.0,475.0,530.0,640.0,212204,738.0,632.0,24000,1,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


Feature scaling

In [71]:
imdb['num_critic_for_reviews'] = preprocessing.scale(imdb['num_critic_for_reviews'])
imdb['duration'] = preprocessing.scale(imdb['duration'])
imdb['director_facebook_likes'] = preprocessing.scale(imdb['director_facebook_likes'])
imdb['actor_1_facebook_likes'] = preprocessing.scale(imdb['actor_1_facebook_likes'])
imdb['actor_2_facebook_likes'] = preprocessing.scale(imdb['actor_2_facebook_likes'])
imdb['actor_3_facebook_likes'] = preprocessing.scale(imdb['actor_3_facebook_likes'])
imdb['movie_facebook_likes'] = preprocessing.scale(imdb['movie_facebook_likes'])
imdb['num_user_for_reviews'] = preprocessing.scale(imdb['num_user_for_reviews'])
imdb['num_voted_users'] = preprocessing.scale(imdb['num_voted_users'])



Displaying the number of instances and features of the dataset i.e. shape

In [72]:
imdb.shape

(5015, 160)

### Performance of a Baseline Model i.e. performance of predicting the mean of the target

In [73]:
from sklearn.dummy import DummyRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
X_train,X_test,Y_train,Y_test = train_test_split(imdb,y,train_size=0.67,random_state=2)

In [74]:
dummy = DummyRegressor()
dummy.fit(X_train,Y_train)
pred = dummy.predict(X_test)
print("Mean squared error: %0.4f"%mean_squared_error(Y_test,pred))
print("R2 score: %0.4f"%r2_score(Y_test,pred))

Mean squared error: 1.2317
R2 score: -0.0012


### Models performance evaluation based on train-test split & cross-validation
### Performance Metrics: 1) Mean Squared Error 2) Root Mean Squared Error 3) R2 Score

In [75]:
from sklearn.model_selection import cross_val_score
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Lasso
from sklearn.linear_model import Ridge
from sklearn.tree import DecisionTreeRegressor

Regression models with default parameter settings

In [76]:
DR = DecisionTreeRegressor()
LA_R = Lasso()
R_R = Ridge()

In [77]:
score1 = cross_val_score(DR,X_train,Y_train,cv=10,scoring="neg_mean_squared_error")
sc = cross_val_score(DR,X_train,Y_train,cv=10,scoring="r2")
print("Mean squared error: %0.4f"%np.mean(np.abs(score1)))
print("Root Mean Squared Error: %0.4f"%np.sqrt(np.mean(np.abs(score1))))
print("R2 score: %0.4f"%np.mean(np.abs(sc)))

Mean squared error: 1.2384
Root Mean Squared Error: 1.1128
R2 score: 0.1411


In [78]:
score = cross_val_score(LA_R,X_train,Y_train,cv=10,scoring="neg_mean_squared_error")
score1 = cross_val_score(LA_R,X_train,Y_train,cv=10,scoring="r2")
print("Mean squared error: %0.4f"%np.mean(np.abs(score)))
print("Root Mean Squared Error: %0.4f"%np.sqrt(np.mean(np.abs(score))))
print("R2 score: %0.4f"%np.mean(np.abs(score1)))

Mean squared error: 1.2765
Root Mean Squared Error: 1.1298
R2 score: 0.0038


In [79]:
score = cross_val_score(R_R,X_train,Y_train,cv=10,scoring="neg_mean_squared_error")
score1 = cross_val_score(R_R,X_train,Y_train,cv=10,scoring="r2")
print("Mean squared error: %0.4f"%np.mean(np.abs(score)))
print("Root Mean Squared Error: %0.4f"%np.sqrt(np.mean(np.abs(score))))
print("R2 score: %0.4f"%np.mean(np.abs(score1)))

Mean squared error: 0.7364
Root Mean Squared Error: 0.8582
R2 score: 0.4230


 Lasso & Ridge Regression with 'alpha=25'

Decision Tree Regressor with different maximum depth values

In [80]:
LA_R = Lasso(alpha=25)
R_R = Ridge(alpha=25)

In [81]:
for i in range(2,10):
    DR = DecisionTreeRegressor(max_depth=i)
    score1 = cross_val_score(DR,X_train,Y_train,cv=10,scoring="neg_mean_squared_error")
    sc = cross_val_score(DR,X_train,Y_train,cv=10,scoring="r2")
    print("Mean squared error: %0.4f"%np.mean(np.abs(score1)))
    print("Root Mean Squared Error: %0.4f"%np.sqrt(np.mean(np.abs(score1))))
    print("R2 score: %0.4f"%np.mean(np.abs(sc)))
    print("\n")

Mean squared error: 0.9514
Root Mean Squared Error: 0.9754
R2 score: 0.2496


Mean squared error: 0.8922
Root Mean Squared Error: 0.9446
R2 score: 0.2980


Mean squared error: 0.8719
Root Mean Squared Error: 0.9337
R2 score: 0.3140


Mean squared error: 0.8377
Root Mean Squared Error: 0.9153
R2 score: 0.3422


Mean squared error: 0.8502
Root Mean Squared Error: 0.9221
R2 score: 0.3331


Mean squared error: 0.8860
Root Mean Squared Error: 0.9413
R2 score: 0.2981


Mean squared error: 0.9095
Root Mean Squared Error: 0.9537
R2 score: 0.2803


Mean squared error: 0.9380
Root Mean Squared Error: 0.9685
R2 score: 0.2649




In [82]:
score = cross_val_score(LA_R,X_train,Y_train,cv=10,scoring="neg_mean_squared_error")
score1 = cross_val_score(LA_R,X_train,Y_train,cv=10,scoring="r2")
print("Mean squared error: %0.4f"%np.mean(np.abs(score)))
print("Root Mean Squared Error: %0.4f"%np.sqrt(np.mean(np.abs(score))))
print("R2 score: %0.4f"%np.mean(np.abs(score1)))

Mean squared error: 1.2765
Root Mean Squared Error: 1.1298
R2 score: 0.0038


In [83]:
score = cross_val_score(R_R,X_train,Y_train,cv=10,scoring="neg_mean_squared_error")
score1 = cross_val_score(R_R,X_train,Y_train,cv=10,scoring="r2")
print("Mean squared error: %0.4f"%np.mean(np.abs(score)))
print("Root Mean Squared Error: %0.4f"%np.sqrt(np.mean(np.abs(score))))
print("R2 score: %0.4f"%np.mean(np.abs(score1)))

Mean squared error: 0.7409
Root Mean Squared Error: 0.8608
R2 score: 0.4195


Lasso & Ridge Regression with 'alpha=50'

In [84]:
LA_R = Lasso(alpha=50)
R_R = Ridge(alpha=50)

In [85]:
score = cross_val_score(LA_R,X_train,Y_train,cv=10,scoring="neg_mean_squared_error")
score1 = cross_val_score(LA_R,X_train,Y_train,cv=10,scoring="r2")
print("Mean squared error: %0.4f"%np.mean(np.abs(score)))
print("Root Mean Squared Error: %0.4f"%np.sqrt(np.mean(np.abs(score))))
print("R2 score: %0.4f"%np.mean(np.abs(score1)))

Mean squared error: 1.2765
Root Mean Squared Error: 1.1298
R2 score: 0.0038


In [86]:
score = cross_val_score(R_R,X_train,Y_train,cv=10,scoring="neg_mean_squared_error")
score1 = cross_val_score(R_R,X_train,Y_train,cv=10,scoring="r2")
print("Mean squared error: %0.4f"%np.mean(np.abs(score)))
print("Root Mean Squared Error: %0.4f"%np.sqrt(np.mean(np.abs(score))))
print("R2 score: %0.4f"%np.mean(np.abs(score1)))

Mean squared error: 0.7493
Root Mean Squared Error: 0.8656
R2 score: 0.4128


Lasso & Ridge Regression with 'alpha=100'

In [87]:
LA_R = Lasso(alpha=100)
R_R = Ridge(alpha=100)

In [88]:
score = cross_val_score(LA_R,X_train,Y_train,cv=10,scoring="neg_mean_squared_error")
score1 = cross_val_score(LA_R,X_train,Y_train,cv=10,scoring="r2")
print("Mean squared error: %0.4f"%np.mean(np.abs(score)))
print("Root Mean Squared Error: %0.4f"%np.sqrt(np.mean(np.abs(score))))
print("R2 score: %0.4f"%np.mean(np.abs(score1)))

Mean squared error: 1.2765
Root Mean Squared Error: 1.1298
R2 score: 0.0038


In [89]:
score = cross_val_score(R_R,X_train,Y_train,cv=10,scoring="neg_mean_squared_error")
score1 = cross_val_score(R_R,X_train,Y_train,cv=10,scoring="r2")
print("Mean squared error: %0.4f"%np.mean(np.abs(score)))
print("Root Mean Squared Error: %0.4f"%np.sqrt(np.mean(np.abs(score))))
print("R2 score: %0.4f"%np.mean(np.abs(score1)))

Mean squared error: 0.7623
Root Mean Squared Error: 0.8731
R2 score: 0.4024


Chosen model's performance when it is trained on the full training data and tested on the test data i.e. using Ridge Regression with default parameter settings

In [90]:
R_R = Ridge()
R_reg = R_R.fit(X_train,Y_train)
pred = R_reg.predict(X_test)
print ("Mean Squared Error: %0.4f"%mean_squared_error(Y_test,pred))
print ("Root Mean Squared Error: %0.4f"%np.sqrt(mean_squared_error(Y_test,pred)))
print ("R2 Score: %0.4f"%r2_score(Y_test,pred))

Mean Squared Error: 0.7143
Root Mean Squared Error: 0.8452
R2 Score: 0.4194


## Models performance evaluation based on cross-validation

Regression models with default parameter settings

In [91]:
DR = DecisionTreeRegressor()
LR = Lasso()
RR = Ridge()

In [92]:
score = cross_val_score(DR,imdb,y,cv=10,scoring="neg_mean_squared_error")
score1 = cross_val_score(DR,imdb,y,cv=10,scoring="r2")
print ("Mean Squared Error: %0.4f"%np.mean(np.abs(score)))
print ("Root Mean Squared Error: %0.4f"%np.sqrt(np.mean(np.abs(score))))
print ("R2 Score: %0.4f"%np.mean(score1))

Mean Squared Error: 1.2546
Root Mean Squared Error: 1.1201
R2 Score: 0.0102


In [93]:
score = cross_val_score(LR,imdb,y,cv=10,scoring="neg_mean_squared_error")
score1 = cross_val_score(LR,imdb,y,cv=10,scoring="r2")
print ("Mean Squared Error: %0.4f"%np.mean(np.abs(score)))
print ("Root Mean Squared Error: %0.4f"%np.sqrt(np.mean(np.abs(score))))
print ("R2 Score: %0.4f"%np.mean(np.abs(score1)))

Mean Squared Error: 1.2628
Root Mean Squared Error: 1.1237
R2 Score: 0.0080


In [94]:
score = cross_val_score(RR,imdb,y,cv=10,scoring="neg_mean_squared_error")
score1 = cross_val_score(RR,imdb,y,cv=10,scoring="r2")
print ("Mean Squared Error: %0.4f"%np.mean(np.abs(score)))
print ("Root Mean Squared Error: %0.4f"%np.sqrt(np.mean(np.abs(score))))
print ("R2 Score: %0.4f"%np.mean(score1))

Mean Squared Error: 0.7423
Root Mean Squared Error: 0.8616
R2 Score: 0.4143


 Lasso & Ridge Regression with 'alpha=25'

Decision Tree Regressor with different maximum depth values

In [95]:
LR = Lasso(alpha=25)
RR = Ridge(alpha=25)

In [96]:
for i in range(2,10):
    DR = DecisionTreeRegressor(max_depth=i)
    score1 = cross_val_score(DR,imdb,y,cv=10,scoring="neg_mean_squared_error")
    sc = cross_val_score(DR,imdb,y,cv=10,scoring="r2")
    print("Mean squared error: %0.4f"%np.mean(np.abs(score1)))
    print("Root Mean Squared Error: %0.4f"%np.sqrt(np.mean(np.abs(score1))))
    print("R2 score: %0.4f"%np.mean(np.abs(sc)))
    print("\n")

Mean squared error: 0.9785
Root Mean Squared Error: 0.9892
R2 score: 0.2336


Mean squared error: 0.9398
Root Mean Squared Error: 0.9694
R2 score: 0.2749


Mean squared error: 0.9018
Root Mean Squared Error: 0.9496
R2 score: 0.2952


Mean squared error: 0.8874
Root Mean Squared Error: 0.9420
R2 score: 0.3259


Mean squared error: 0.8705
Root Mean Squared Error: 0.9330
R2 score: 0.3316


Mean squared error: 0.8543
Root Mean Squared Error: 0.9243
R2 score: 0.3370


Mean squared error: 0.8783
Root Mean Squared Error: 0.9372
R2 score: 0.3078


Mean squared error: 0.9033
Root Mean Squared Error: 0.9504
R2 score: 0.3109




In [97]:
score = cross_val_score(LR,imdb,y,cv=10,scoring="neg_mean_squared_error")
score1 = cross_val_score(LR,imdb,y,cv=10,scoring="r2")
print ("Mean Squared Error: %0.4f"%np.mean(np.abs(score)))
print ("Root Mean Squared Error: %0.4f"%np.sqrt(np.mean(np.abs(score))))
print ("R2 Score: %0.4f"%np.mean(np.abs(score1)))

Mean Squared Error: 1.2628
Root Mean Squared Error: 1.1237
R2 Score: 0.0080


In [98]:
score = cross_val_score(RR,imdb,y,cv=10,scoring="neg_mean_squared_error")
score1 = cross_val_score(RR,imdb,y,cv=10,scoring="r2")
print ("Mean Squared Error: %0.4f"%np.mean(np.abs(score)))
print ("Root Mean Squared Error: %0.4f"%np.sqrt(np.mean(np.abs(score))))
print ("R2 Score: %0.4f"%np.mean(score1))

Mean Squared Error: 0.7454
Root Mean Squared Error: 0.8634
R2 Score: 0.4111


Lasso & Ridge Regression with 'alpha=50'

In [99]:
LR = Lasso(alpha=50)
RR = Ridge(alpha=50)

In [100]:
score = cross_val_score(LR,imdb,y,cv=10,scoring="neg_mean_squared_error")
score1 = cross_val_score(LR,imdb,y,cv=10,scoring="r2")
print ("Mean Squared Error: %0.4f"%np.mean(np.abs(score)))
print ("Root Mean Squared Error: %0.4f"%np.sqrt(np.mean(np.abs(score))))
print ("R2 Score: %0.4f"%np.mean(np.abs(score1)))

Mean Squared Error: 1.2628
Root Mean Squared Error: 1.1237
R2 Score: 0.0080


In [101]:
score = cross_val_score(RR,imdb,y,cv=10,scoring="neg_mean_squared_error")
score1 = cross_val_score(RR,imdb,y,cv=10,scoring="r2")
print ("Mean Squared Error: %0.4f"%np.mean(np.abs(score)))
print ("Root Mean Squared Error: %0.4f"%np.sqrt(np.mean(np.abs(score))))
print ("R2 Score: %0.4f"%np.mean(score1))

Mean Squared Error: 0.7539
Root Mean Squared Error: 0.8683
R2 Score: 0.4046


Lasso & Ridge Regression with 'alpha=100'

In [102]:
LR = Lasso(alpha=100)
RR = Ridge(alpha=100)

In [103]:
score = cross_val_score(LR,imdb,y,cv=10,scoring="neg_mean_squared_error")
score1 = cross_val_score(LR,imdb,y,cv=10,scoring="r2")
print ("Mean Squared Error: %0.4f"%np.mean(np.abs(score)))
print ("Root Mean Squared Error: %0.4f"%np.sqrt(np.mean(np.abs(score))))
print ("R2 Score: %0.4f"%np.mean(np.abs(score1)))

Mean Squared Error: 1.2628
Root Mean Squared Error: 1.1237
R2 Score: 0.0080


In [104]:
score = cross_val_score(RR,imdb,y,cv=10,scoring="neg_mean_squared_error")
score1 = cross_val_score(RR,imdb,y,cv=10,scoring="r2")
print ("Mean Squared Error: %0.4f"%np.mean(np.abs(score)))
print ("Root Mean Squared Error: %0.4f"%np.sqrt(np.mean(np.abs(score))))
print ("R2 Score: %0.4f"%np.mean(score1))

Mean Squared Error: 0.7667
Root Mean Squared Error: 0.8756
R2 Score: 0.3952


Chosen model's cross validation performance i.e. using Ridge Regression with default parameter setting

In [105]:
R_R = Ridge()
score = cross_val_score(R_R,imdb,y,cv=10,scoring="neg_mean_squared_error")
score1 = cross_val_score(R_R,imdb,y,cv=10,scoring="r2")
print ("Mean Squared Error: %0.4f"%np.mean(np.abs(score)))
print ("Root Mean Squared Error: %0.4f"%np.sqrt(np.mean(np.abs(score))))
print ("R2 Score: %0.4f"%np.mean(np.abs(score1)))

Mean Squared Error: 0.7423
Root Mean Squared Error: 0.8616
R2 Score: 0.4143


### Important Features

As we can see from above conclusions, Ridge Regression with default parameter settings has the best performance using train-test split with cross-validation

In [106]:
R_R = Ridge()
R_reg = R_R.fit(X_train,Y_train)
pred = R_reg.predict(X_test)
print (Y_test)
print (pred)
print ("Mean Squared Error: %0.4f"%mean_squared_error(Y_test,pred))
print ("Root Mean Squared Error: %0.4f"%np.sqrt(mean_squared_error(Y_test,pred)))
print ("R2 Score: %0.4f"%r2_score(Y_test,pred))

4142    8.2
700     6.8
1202    7.5
4336    5.6
3566    7.3
549     5.9
3629    4.7
3647    8.4
3197    6.9
1783    5.4
3095    7.0
2401    8.3
1257    4.7
3574    7.8
186     5.5
3549    7.3
4324    6.4
1367    5.7
4182    7.6
4308    7.3
4073    6.9
3183    7.1
4425    3.3
119     8.3
2318    8.4
4514    6.6
3782    4.6
1736    7.7
4325    5.2
500     6.0
       ... 
3997    4.7
723     6.6
430     6.1
2739    4.5
4743    3.5
4834    4.3
1296    4.8
1744    8.0
4018    8.1
4717    7.3
4252    8.1
660     7.2
335     6.7
4382    7.1
76      5.8
4569    6.8
3288    4.8
1691    6.7
2080    6.2
1392    4.5
13      6.5
3796    5.1
3841    6.0
4643    7.2
2270    7.5
4148    7.0
3527    5.3
4672    5.3
846     5.1
108     6.6
Name: imdb_score, dtype: float64
[ 6.22796098  6.16020381  6.83532905 ...,  6.34224503  6.27659172
  6.26348782]
Mean Squared Error: 0.7143
Root Mean Squared Error: 0.8452
R2 Score: 0.4194


In [107]:
coefficients = R_reg.coef_

In [108]:
features = imdb.columns.values

In [109]:
matrix_negative = np.vstack([features,coefficients]).T

In [110]:
matrix_positive = matrix_negative[matrix_negative[:,1].argsort()[::-1]]

Top positive features and their weights

In [111]:
matrix_positive

array([['country_Kyrgyzstan', 1.2547050098532155],
       ['language_Korean', 1.1509457726128334],
       ['content_rating_TV-MA', 1.0913746710299523],
       ['Documentary', 1.0105998530081195],
       ['country_Iran', 0.828704005540298],
       ['content_rating_TV-G', 0.8284871859390807],
       ['language_Telugu', 0.7795143021509318],
       ['content_rating_TV-PG', 0.5963084354328212],
       ['content_rating_TV-Y7', 0.5786589536666419],
       ['language_Japanese', 0.5713429311425497],
       ['language_None', 0.5407355460875604],
       ['content_rating_TV-Y', 0.5196393792102163],
       ['language_Kannada', 0.5187087417391961],
       ['num_voted_users', 0.5141086187314063],
       ['country_Libya', 0.5003527591402297],
       ['Drama', 0.49578718735091165],
       ['Animation', 0.4849886509577091],
       ['language_Danish', 0.4123181690949352],
       ['content_rating_TV-14', 0.3987449664973493],
       ['country_Argentina', 0.3851741049373123],
       ['content_rating_X', 0.3

Top negative features and their weights

In [112]:
matrix_negative

array([['num_critic_for_reviews', 0.24540579371734844],
       ['duration', 0.12718088108709036],
       ['director_facebook_likes', 0.021384894635771052],
       ['actor_3_facebook_likes', -0.05856280797849627],
       ['actor_1_facebook_likes', 0.027645812341205996],
       ['num_voted_users', 0.5141086187314063],
       ['num_user_for_reviews', -0.146938126325126],
       ['actor_2_facebook_likes', -0.0030331001332927696],
       ['movie_facebook_likes', -0.0810974832082851],
       ['Adventure', 0.0540495013405],
       ['Action', -0.23242024277036716],
       ['Drama', 0.49578718735091165],
       ['Fantasy', -0.018399929735174257],
       ['Romance', 0.01871435733358147],
       ['Comedy', -0.06319939106595455],
       ['Family', -0.20776169857903548],
       ['Musical', 0.11578019795233165],
       ['History', 0.045009660912052565],
       ['War', 0.10193227857850963],
       ['Documentary', 1.0105998530081195],
       ['Sport', 0.18644835668033208],
       ['Crime', 0.084321062

Conclusion

Seeing the overall results, it can be deduced that, Ridge Regression with default parameter setting works better than Decision Tree Regressor and Lasso Regression