In [1]:
import pyspark 

In [2]:
spark  = pyspark.sql.SparkSession.builder.getOrCreate()
sc = spark.sparkContext

In [3]:
ratings = spark.read.json('data/ratings.json')

In [4]:
ratings.persist()

DataFrame[movie_id: bigint, rating: bigint, timestamp: double, user_id: bigint]

In [5]:
ratings.show(5)

+--------+------+------------+-------+
|movie_id|rating|   timestamp|user_id|
+--------+------+------------+-------+
|     858|     4|9.56678732E8|   6040|
|    2384|     4|9.56678754E8|   6040|
|     593|     5|9.56678754E8|   6040|
|    1961|     4|9.56678777E8|   6040|
|    1419|     3|9.56678856E8|   6040|
+--------+------+------------+-------+
only showing top 5 rows



In [6]:
import pandas as pd 
movies = pd.read_csv('data/movies.dat', sep='::', engine='python', header=None)
len(movies)

3883

In [7]:
users = pd.read_csv('data/users.dat', sep='::', engine='python', header=None)
users.head()

Unnamed: 0,0,1,2,3,4
0,1,F,1,10,48067
1,2,M,56,16,70072
2,3,M,25,15,55117
3,4,M,45,7,2460
4,5,M,25,20,55455


In [8]:
users = users.rename({0:'user_id', 
              1:'gender', 
              2:'min_age', 
              3:'occupation', 
              4:'zipcode'}, 
             axis=1)
users.min_age.value_counts()

25    2096
35    1193
18    1103
45     550
50     496
56     380
1      222
Name: min_age, dtype: int64

In [9]:
requests = spark.read.json('data/requests.json')

In [10]:
requests.persist()

DataFrame[movie_id: bigint, rating: double, timestamp: double, user_id: bigint]

In [11]:
requests.count()

280260

In [12]:
ratings_df = ratings.toPandas()

In [13]:
ratings_df.head()

Unnamed: 0,movie_id,rating,timestamp,user_id
0,858,4,956678732.0,6040
1,2384,4,956678754.0,6040
2,593,5,956678754.0,6040
3,1961,4,956678777.0,6040
4,1419,3,956678856.0,6040


In [14]:
from pyspark.ml.recommendation import ALSModel, ALS

In [15]:
als = ALS(
    rank=11,
    userCol='user_id',
    itemCol='movie_id',
    ratingCol='rating'
)

In [16]:
als_model = als.fit(ratings)

In [17]:
preds = als_model.transform(ratings)

In [18]:
request_preds = als_model.transform(requests)

In [19]:
nan_df = request_preds.toPandas()

In [20]:
nan_df.head()

Unnamed: 0,movie_id,rating,timestamp,user_id,prediction
0,148,,977959026.0,53,
1,148,,976559602.0,4169,3.100361
2,148,,989024856.0,5333,2.316402
3,148,,977005381.0,4387,2.392085
4,148,,966907208.0,3539,2.722387


In [21]:
import numpy as np
nan_df = nan_df[nan_df['prediction'].isnull()]

In [22]:
nan_df.head()

Unnamed: 0,movie_id,rating,timestamp,user_id,prediction
0,148,,977959000.0,53,
6,148,,976841600.0,216,
7,148,,976191200.0,482,
9,148,,1026978000.0,424,
14,463,,978242800.0,26,


In [23]:
len(nan_df)

95628

In [24]:
meta_df = pd.read_csv('data/movies_metadata.csv')

  interactivity=interactivity, compiler=compiler, result=result)


In [25]:
meta_df[meta_df.id.str.contains('-')==True]
#drop things that got shifted 

Unnamed: 0,adult,belongs_to_collection,budget,genres,homepage,id,imdb_id,original_language,original_title,overview,...,release_date,revenue,runtime,spoken_languages,status,tagline,title,video,vote_average,vote_count
19730,- Written by Ørnås,0.065736,/ff9qCepilowshEtG2GYWwzt2bs4.jpg,"[{'name': 'Carousel Productions', 'id': 11176}...","[{'iso_3166_1': 'CA', 'name': 'Canada'}, {'iso...",1997-08-20,0,104.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,...,1,,,,,,,,,
29503,Rune Balot goes to a casino connected to the ...,1.931659,/zV8bHuSL6WXoD6FWogP9j4x80bL.jpg,"[{'name': 'Aniplex', 'id': 2883}, {'name': 'Go...","[{'iso_3166_1': 'US', 'name': 'United States o...",2012-09-29,0,68.0,"[{'iso_639_1': 'ja', 'name': '日本語'}]",Released,...,12,,,,,,,,,
35587,Avalanche Sharks tells the story of a bikini ...,2.185485,/zaSf5OG7V8X8gqFvly88zDdRm46.jpg,"[{'name': 'Odyssey Media', 'id': 17161}, {'nam...","[{'iso_3166_1': 'CA', 'name': 'Canada'}]",2014-01-01,0,82.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,...,22,,,,,,,,,


In [26]:
bad_ids = ['1997-08-20', '2012-09-29', '2014-01-01'] 

In [27]:
meta_df = meta_df[~meta_df['id'].isin(bad_ids)]
meta_df.head()

Unnamed: 0,adult,belongs_to_collection,budget,genres,homepage,id,imdb_id,original_language,original_title,overview,...,release_date,revenue,runtime,spoken_languages,status,tagline,title,video,vote_average,vote_count
0,False,"{'id': 10194, 'name': 'Toy Story Collection', ...",30000000,"[{'id': 16, 'name': 'Animation'}, {'id': 35, '...",http://toystory.disney.com/toy-story,862,tt0114709,en,Toy Story,"Led by Woody, Andy's toys live happily in his ...",...,1995-10-30,373554033.0,81.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,,Toy Story,False,7.7,5415.0
1,False,,65000000,"[{'id': 12, 'name': 'Adventure'}, {'id': 14, '...",,8844,tt0113497,en,Jumanji,When siblings Judy and Peter discover an encha...,...,1995-12-15,262797249.0,104.0,"[{'iso_639_1': 'en', 'name': 'English'}, {'iso...",Released,Roll the dice and unleash the excitement!,Jumanji,False,6.9,2413.0
2,False,"{'id': 119050, 'name': 'Grumpy Old Men Collect...",0,"[{'id': 10749, 'name': 'Romance'}, {'id': 35, ...",,15602,tt0113228,en,Grumpier Old Men,A family wedding reignites the ancient feud be...,...,1995-12-22,0.0,101.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Still Yelling. Still Fighting. Still Ready for...,Grumpier Old Men,False,6.5,92.0
3,False,,16000000,"[{'id': 35, 'name': 'Comedy'}, {'id': 18, 'nam...",,31357,tt0114885,en,Waiting to Exhale,"Cheated on, mistreated and stepped on, the wom...",...,1995-12-22,81452156.0,127.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Friends are the people who let you be yourself...,Waiting to Exhale,False,6.1,34.0
4,False,"{'id': 96871, 'name': 'Father of the Bride Col...",0,"[{'id': 35, 'name': 'Comedy'}]",,11862,tt0113041,en,Father of the Bride Part II,Just when George Banks has recovered from his ...,...,1995-02-10,76578911.0,106.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Just When His World Is Back To Normal... He's ...,Father of the Bride Part II,False,5.7,173.0


In [28]:
meta_df['id'] = meta_df['id'].astype(int)

In [63]:
all_training_data_df = ratings_df.merge(meta_df, how='left', left_on='movie_id', right_on='id')

In [64]:
all_training_data_df = all_training_data_df.merge(users, how='left', left_on='user_id', right_on='user_id')

In [65]:
all_training_data_df.head().T

Unnamed: 0,0,1,2,3,4
movie_id,858,2384,593,1961,1419
rating,4,4,5,4,3
timestamp,9.56679e+08,9.56679e+08,9.56679e+08,9.56679e+08,9.56679e+08
user_id,6040,6040,6040,6040,6040
adult,False,,False,False,False
belongs_to_collection,,,,,
budget,21000000,,0,1500000,0
genres,"[{'id': 35, 'name': 'Comedy'}, {'id': 18, 'nam...",,"[{'id': 18, 'name': 'Drama'}, {'id': 878, 'nam...","[{'id': 35, 'name': 'Comedy'}, {'id': 27, 'nam...","[{'id': 35, 'name': 'Comedy'}, {'id': 18, 'nam..."
homepage,,,,http://www.dhentertainment.com/projects/1.my-n...,http://www.vollidiot-derfilm.de/
id,858,,593,1961,1419


In [29]:
all_data_df = nan_df.merge(meta_df, how='left', left_on='movie_id', right_on='id')

In [30]:
all_data_df.head()

Unnamed: 0,movie_id,rating,timestamp,user_id,prediction,adult,belongs_to_collection,budget,genres,homepage,...,release_date,revenue,runtime,spoken_languages,status,tagline,title,video,vote_average,vote_count
0,148,,977959000.0,53,,False,,5000000.0,"[{'id': 18, 'name': 'Drama'}, {'id': 10749, 'n...",,...,2005-12-15,0.0,112.0,"[{'iso_639_1': 'en', 'name': 'English'}, {'iso...",Released,,The Secret Life of Words,False,6.8,52.0
1,148,,976841600.0,216,,False,,5000000.0,"[{'id': 18, 'name': 'Drama'}, {'id': 10749, 'n...",,...,2005-12-15,0.0,112.0,"[{'iso_639_1': 'en', 'name': 'English'}, {'iso...",Released,,The Secret Life of Words,False,6.8,52.0
2,148,,976191200.0,482,,False,,5000000.0,"[{'id': 18, 'name': 'Drama'}, {'id': 10749, 'n...",,...,2005-12-15,0.0,112.0,"[{'iso_639_1': 'en', 'name': 'English'}, {'iso...",Released,,The Secret Life of Words,False,6.8,52.0
3,148,,1026978000.0,424,,False,,5000000.0,"[{'id': 18, 'name': 'Drama'}, {'id': 10749, 'n...",,...,2005-12-15,0.0,112.0,"[{'iso_639_1': 'en', 'name': 'English'}, {'iso...",Released,,The Secret Life of Words,False,6.8,52.0
4,463,,978242800.0,26,,,,,,,...,,,,,,,,,,


In [31]:
all_data_df = nan_df.merge(meta_df, how='left', left_on='movie_id', right_on='id')

In [32]:
all_data_df = all_data_df.merge(users, how='left', left_on='user_id', right_on='user_id')
all_data_df.head(2)

Unnamed: 0,movie_id,rating,timestamp,user_id,prediction,adult,belongs_to_collection,budget,genres,homepage,...,status,tagline,title,video,vote_average,vote_count,gender,min_age,occupation,zipcode
0,148,,977959026.0,53,,False,,5000000,"[{'id': 18, 'name': 'Drama'}, {'id': 10749, 'n...",,...,Released,,The Secret Life of Words,False,6.8,52.0,M,25,0,96931
1,148,,976841639.0,216,,False,,5000000,"[{'id': 18, 'name': 'Drama'}, {'id': 10749, 'n...",,...,Released,,The Secret Life of Words,False,6.8,52.0,M,45,13,52761


In [35]:
all_data_df.head(1)

Unnamed: 0,movie_id,rating,timestamp,user_id,prediction,adult,belongs_to_collection,budget,genres,homepage,...,status,tagline,title,video,vote_average,vote_count,gender,min_age,occupation,zipcode
0,148,,977959026.0,53,,False,,5000000,"[{'id': 18, 'name': 'Drama'}, {'id': 10749, 'n...",,...,Released,,The Secret Life of Words,False,6.8,52.0,M,25,0,96931


In [36]:
X = all_data_df.filter(['zipcode','occupation','min_age','gender','vote_count', 'vote_average', 
                'runtime', 'revenue', 'release_date', 'popularity', 
               'budget', 'adult', 'user_id', 'movie_id'], axis=1)

In [37]:
y = all_data_df.filter(['prediction'], axis=1)

In [38]:
from keras.models import Sequential
from keras.layers import Dense 
from keras.optimizers import Adam 

Using TensorFlow backend.


In [39]:
from sklearn.preprocessing import OneHotEncoder, StandardScaler

In [40]:
gender_dict = {'M':0, 'F':1}

In [41]:
X['gender'] = X['gender'].replace(gender_dict)

In [42]:
adult_dict = {'True':True, 'False':False}

In [43]:
X['adult'] = X['adult'].replace(adult_dict)
type(X['adult'][0])

bool

In [44]:
X['budget'] = X['budget'].astype(float)

In [45]:
X.drop('zipcode', axis=1, inplace=True)

In [46]:
X['release_date'] = pd.DatetimeIndex(X['release_date']).astype(np.int64)

In [47]:
X['popularity'].astype(np.float64, copy=False)
type(X['popularity'][0])

float

In [48]:
#one-hot encode occupation
enc_cols = X['occupation'].values.reshape(-1, 1)
encoder = OneHotEncoder().fit(enc_cols)
encoder.get_feature_names(['occupation'])

ohe = pd.DataFrame(encoder.transform(enc_cols).toarray(),
                   columns=encoder.get_feature_names(['occupation']))

X = pd.concat([X.drop(['occupation'], axis=1), ohe], axis=1, )

In case you used a LabelEncoder before this OneHotEncoder to convert the categories to integers, then you can now use the OneHotEncoder directly.


In [49]:
X.head().T

Unnamed: 0,0,1,2,3,4
min_age,25,45,25,25,25.0
gender,0,0,0,0,0.0
vote_count,52,52,52,52,
vote_average,6.8,6.8,6.8,6.8,
runtime,112,112,112,112,
revenue,0,0,0,0,
release_date,1134604800000000000,1134604800000000000,1134604800000000000,1134604800000000000,-9.223372036854778e+18
popularity,12.7756,12.7756,12.7756,12.7756,
budget,5e+06,5e+06,5e+06,5e+06,
adult,False,False,False,False,


In [50]:
X.drop(['occupation_0.0'], axis=1, inplace=True)

In [51]:
X.rename({'occupation_1.0':'academic_educator', 
          'occupation_2.0':'artist',
         'occupation_3.0':'clerical_admin',
         'occupation_4.0':'coll_grad_student',
         'occupation_5.0':'cust_service',
         'occupation_6.0':'doctor',
         'occupation_7.0':'exec',
         'occupation_8.0':'farmer',
         'occupation_9.0':'homemaker',
         'occupation_10.0':'young_student',
         'occupation_11.0':'lawyer',
         'occupation_12.0':'programmer',
         'occupation_13.0':'retired',
         'occupation_14.0':'sales_mkting',
          'occupation_15.0':'scientist',
         'occupation_16.0':'self_employed',
         'occupation_17.0':'tech_eng',
         'occupation_18.0':'tradesman',
          'occupation_19.0':'unemployed',
          'occupation_20.0':'writer',}, axis=1, inplace=True)


In [52]:
X.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 95628 entries, 0 to 95627
Data columns (total 32 columns):
min_age              95628 non-null int64
gender               95628 non-null int64
vote_count           48000 non-null float64
vote_average         48000 non-null float64
runtime              48000 non-null float64
revenue              48000 non-null float64
release_date         95628 non-null int64
popularity           48000 non-null object
budget               48000 non-null float64
adult                48000 non-null object
user_id              95628 non-null int64
movie_id             95628 non-null int64
academic_educator    95628 non-null float64
artist               95628 non-null float64
clerical_admin       95628 non-null float64
coll_grad_student    95628 non-null float64
cust_service         95628 non-null float64
doctor               95628 non-null float64
exec                 95628 non-null float64
farmer               95628 non-null float64
homemaker            95

In [53]:
#X_3 will have more rows to train the model but we'll only take the predictions of the ones we still need
X_3 = X.drop(['vote_count', 'vote_average', 'runtime', 
                         'revenue', 'popularity', 'budget', 'adult'],axis=1)

In [54]:
X_2 = X.dropna()

In [55]:
X_2.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 48000 entries, 0 to 95491
Data columns (total 32 columns):
min_age              48000 non-null int64
gender               48000 non-null int64
vote_count           48000 non-null float64
vote_average         48000 non-null float64
runtime              48000 non-null float64
revenue              48000 non-null float64
release_date         48000 non-null int64
popularity           48000 non-null object
budget               48000 non-null float64
adult                48000 non-null object
user_id              48000 non-null int64
movie_id             48000 non-null int64
academic_educator    48000 non-null float64
artist               48000 non-null float64
clerical_admin       48000 non-null float64
coll_grad_student    48000 non-null float64
cust_service         48000 non-null float64
doctor               48000 non-null float64
exec                 48000 non-null float64
farmer               48000 non-null float64
homemaker            48

In [56]:
X_3.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 95628 entries, 0 to 95627
Data columns (total 25 columns):
min_age              95628 non-null int64
gender               95628 non-null int64
release_date         95628 non-null int64
user_id              95628 non-null int64
movie_id             95628 non-null int64
academic_educator    95628 non-null float64
artist               95628 non-null float64
clerical_admin       95628 non-null float64
coll_grad_student    95628 non-null float64
cust_service         95628 non-null float64
doctor               95628 non-null float64
exec                 95628 non-null float64
farmer               95628 non-null float64
homemaker            95628 non-null float64
young_student        95628 non-null float64
lawyer               95628 non-null float64
programmer           95628 non-null float64
retired              95628 non-null float64
sales_mkting         95628 non-null float64
scientist            95628 non-null float64
self_employed        

In [57]:
print(f'There are {95628-48000} predictions for our third model to make.')

There are 47628 predictions for our third model to make.


In [66]:
all_training_data_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 719949 entries, 0 to 719948
Data columns (total 32 columns):
movie_id                 719949 non-null int64
rating                   719949 non-null int64
timestamp                719949 non-null float64
user_id                  719949 non-null int64
adult                    361823 non-null object
belongs_to_collection    88134 non-null object
budget                   361823 non-null object
genres                   361823 non-null object
homepage                 90115 non-null object
id                       361823 non-null float64
imdb_id                  361823 non-null object
original_language        361823 non-null object
original_title           361823 non-null object
overview                 360854 non-null object
popularity               361823 non-null object
poster_path              361823 non-null object
production_companies     361823 non-null object
production_countries     361823 non-null object
release_date             361

In [89]:
X_train = all_training_data_df.filter(['zipcode','occupation','min_age','gender','vote_count', 'vote_average', 
                'runtime', 'revenue', 'release_date', 'popularity', 
               'budget', 'adult', 'user_id', 'movie_id', 'rating'], axis=1)

In [90]:
X_train['gender'] = X_train['gender'].replace(gender_dict)

In [91]:
X_train['adult'] = X_train['adult'].replace(adult_dict)
type(X_train['adult'][0])

bool

In [92]:
X_train['budget'] = X_train['budget'].astype(float)

In [93]:
X_train.drop('zipcode', axis=1, inplace=True)

In [94]:
X_train['release_date'] = pd.DatetimeIndex(X_train['release_date']).astype(np.int64)

In [95]:
X_train['popularity'].astype(np.float64, copy=False)
type(X_train['popularity'][0])

float

In [96]:
#one-hot encode occupation
enc_cols = X_train['occupation'].values.reshape(-1, 1)
encoder = OneHotEncoder().fit(enc_cols)
encoder.get_feature_names(['occupation'])

ohe = pd.DataFrame(encoder.transform(enc_cols).toarray(),
                   columns=encoder.get_feature_names(['occupation']))

X_train = pd.concat([X_train.drop(['occupation'], axis=1), ohe], axis=1, )

In case you used a LabelEncoder before this OneHotEncoder to convert the categories to integers, then you can now use the OneHotEncoder directly.


In [97]:
X_train.head().T

Unnamed: 0,0,1,2,3,4
min_age,25,25.0,25,25,25
gender,0,0.0,0,0,0
vote_count,630,,364,107,25
vote_average,6.5,,7.7,5.9,4.3
runtime,105,,167,86,102
revenue,2.278e+08,,0,173066,0
release_date,740880000000000000,-9.223372036854778e+18,69897600000000000,1191196800000000000,1176249600000000000
popularity,10.2349,,11.0598,8.70879,1.99742
budget,2.1e+07,,0,1.5e+06,0
adult,False,,False,False,False


In [98]:
X_train.drop(['occupation_0.0'], axis=1, inplace=True)

In [99]:
X_train.rename({'occupation_1.0':'academic_educator', 
          'occupation_2.0':'artist',
         'occupation_3.0':'clerical_admin',
         'occupation_4.0':'coll_grad_student',
         'occupation_5.0':'cust_service',
         'occupation_6.0':'doctor',
         'occupation_7.0':'exec',
         'occupation_8.0':'farmer',
         'occupation_9.0':'homemaker',
         'occupation_10.0':'young_student',
         'occupation_11.0':'lawyer',
         'occupation_12.0':'programmer',
         'occupation_13.0':'retired',
         'occupation_14.0':'sales_mkting',
          'occupation_15.0':'scientist',
         'occupation_16.0':'self_employed',
         'occupation_17.0':'tech_eng',
         'occupation_18.0':'tradesman',
          'occupation_19.0':'unemployed',
          'occupation_20.0':'writer',}, axis=1, inplace=True)


In [100]:
X_train.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 719949 entries, 0 to 719948
Data columns (total 33 columns):
min_age              719949 non-null int64
gender               719949 non-null int64
vote_count           361823 non-null float64
vote_average         361823 non-null float64
runtime              361823 non-null float64
revenue              361823 non-null float64
release_date         719949 non-null int64
popularity           361823 non-null object
budget               361823 non-null float64
adult                361823 non-null object
user_id              719949 non-null int64
movie_id             719949 non-null int64
rating               719949 non-null int64
academic_educator    719949 non-null float64
artist               719949 non-null float64
clerical_admin       719949 non-null float64
coll_grad_student    719949 non-null float64
cust_service         719949 non-null float64
doctor               719949 non-null float64
exec                 719949 non-null float64
far

In [102]:
#X_3 will have more rows to train the model but we'll only take the predictions of the ones we still need
X_3_train = X_train.drop(['vote_count', 'vote_average', 'runtime', 
                         'revenue', 'popularity', 'budget', 'adult', 'rating'],axis=1)
y_3_train = X_train.filter(['rating'], axis=1)

In [109]:
X_2_train = X_train.dropna()
y_2_train = X_2_train.filter(['rating'], axis=1)
X_2_train = X_2_train.drop(['rating'], axis=1)

In [110]:
X_2_train.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 361823 entries, 0 to 719948
Data columns (total 32 columns):
min_age              361823 non-null int64
gender               361823 non-null int64
vote_count           361823 non-null float64
vote_average         361823 non-null float64
runtime              361823 non-null float64
revenue              361823 non-null float64
release_date         361823 non-null int64
popularity           361823 non-null object
budget               361823 non-null float64
adult                361823 non-null object
user_id              361823 non-null int64
movie_id             361823 non-null int64
academic_educator    361823 non-null float64
artist               361823 non-null float64
clerical_admin       361823 non-null float64
coll_grad_student    361823 non-null float64
cust_service         361823 non-null float64
doctor               361823 non-null float64
exec                 361823 non-null float64
farmer               361823 non-null float64
h

In [111]:
X_3_train.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 719949 entries, 0 to 719948
Data columns (total 25 columns):
min_age              719949 non-null int64
gender               719949 non-null int64
release_date         719949 non-null int64
user_id              719949 non-null int64
movie_id             719949 non-null int64
academic_educator    719949 non-null float64
artist               719949 non-null float64
clerical_admin       719949 non-null float64
coll_grad_student    719949 non-null float64
cust_service         719949 non-null float64
doctor               719949 non-null float64
exec                 719949 non-null float64
farmer               719949 non-null float64
homemaker            719949 non-null float64
young_student        719949 non-null float64
lawyer               719949 non-null float64
programmer           719949 non-null float64
retired              719949 non-null float64
sales_mkting         719949 non-null float64
scientist            719949 non-null float64

In [119]:
ss = StandardScaler()
X_2_scaled = ss.fit_transform(X_2)
X_2_train_scaled = ss.fit_transform(X_2_train)
X_3_train_scaled = ss.fit_transform(X_3_train)
X_3_scaled = ss.fit_transform(X_3)

  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)
  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)
  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)
  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)


In [120]:
model = Sequential()
inputs = X_2_scaled.shape[1]
hiddens = inputs
model.add(Dense(hiddens, input_dim=inputs, activation='relu'))
model.add(Dense(1))
adam=Adam()

In [122]:
model.compile(optimizer=adam, loss='mean_squared_error')

In [124]:
model.fit(X_2_train_scaled, y_2_train, epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x1241952b0>