diff --git a/your-code/main.ipynb b/your-code/main.ipynb index 8818016..7095e03 100644 --- a/your-code/main.ipynb +++ b/your-code/main.ipynb @@ -44,7 +44,7 @@ "metadata": {}, "outputs": [], "source": [ - "# your code here\n" + "diabetes = datasets.load_diabetes()" ] }, { @@ -56,22 +56,22 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 4, "metadata": {}, "outputs": [ { + "output_type": "execute_result", "data": { "text/plain": [ "dict_keys(['data', 'target', 'frame', 'DESCR', 'feature_names', 'data_filename', 'target_filename'])" ] }, - "execution_count": 3, "metadata": {}, - "output_type": "execute_result" + "execution_count": 4 } ], "source": [ - "# your code here\n" + "diabetes.keys()" ] }, { @@ -87,58 +87,21 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 5, "metadata": { "scrolled": false }, "outputs": [ { - "name": "stdout", "output_type": "stream", + "name": "stdout", "text": [ - ".. _diabetes_dataset:\n", - "\n", - "Diabetes dataset\n", - "----------------\n", - "\n", - "Ten baseline variables, age, sex, body mass index, average blood\n", - "pressure, and six blood serum measurements were obtained for each of n =\n", - "442 diabetes patients, as well as the response of interest, a\n", - "quantitative measure of disease progression one year after baseline.\n", - "\n", - "**Data Set Characteristics:**\n", - "\n", - " :Number of Instances: 442\n", - "\n", - " :Number of Attributes: First 10 columns are numeric predictive values\n", - "\n", - " :Target: Column 11 is a quantitative measure of disease progression one year after baseline\n", - "\n", - " :Attribute Information:\n", - " - age age in years\n", - " - sex\n", - " - bmi body mass index\n", - " - bp average blood pressure\n", - " - s1 tc, T-Cells (a type of white blood cells)\n", - " - s2 ldl, low-density lipoproteins\n", - " - s3 hdl, high-density lipoproteins\n", - " - s4 tch, thyroid stimulating hormone\n", - " - s5 ltg, lamotrigine\n", - " - s6 glu, blood sugar level\n", - "\n", - "Note: Each of these 10 feature variables have been mean centered and scaled by the standard deviation times `n_samples` (i.e. the sum of squares of each column totals 1).\n", - "\n", - "Source URL:\n", - "https://www4.stat.ncsu.edu/~boos/var.select/diabetes.html\n", - "\n", - "For more information see:\n", - "Bradley Efron, Trevor Hastie, Iain Johnstone and Robert Tibshirani (2004) \"Least Angle Regression,\" Annals of Statistics (with discussion), 407-499.\n", - "(https://web.stanford.edu/~hastie/Papers/LARS/LeastAngle_2002.pdf)\n" + ".. _diabetes_dataset:\n\nDiabetes dataset\n----------------\n\nTen baseline variables, age, sex, body mass index, average blood\npressure, and six blood serum measurements were obtained for each of n =\n442 diabetes patients, as well as the response of interest, a\nquantitative measure of disease progression one year after baseline.\n\n**Data Set Characteristics:**\n\n :Number of Instances: 442\n\n :Number of Attributes: First 10 columns are numeric predictive values\n\n :Target: Column 11 is a quantitative measure of disease progression one year after baseline\n\n :Attribute Information:\n - age age in years\n - sex\n - bmi body mass index\n - bp average blood pressure\n - s1 tc, T-Cells (a type of white blood cells)\n - s2 ldl, low-density lipoproteins\n - s3 hdl, high-density lipoproteins\n - s4 tch, thyroid stimulating hormone\n - s5 ltg, lamotrigine\n - s6 glu, blood sugar level\n\nNote: Each of these 10 feature variables have been mean centered and scaled by the standard deviation times `n_samples` (i.e. the sum of squares of each column totals 1).\n\nSource URL:\nhttps://www4.stat.ncsu.edu/~boos/var.select/diabetes.html\n\nFor more information see:\nBradley Efron, Trevor Hastie, Iain Johnstone and Robert Tibshirani (2004) \"Least Angle Regression,\" Annals of Statistics (with discussion), 407-499.\n(https://web.stanford.edu/~hastie/Papers/LARS/LeastAngle_2002.pdf)\n" ] } ], "source": [ - "# your code here\n" + "print(diabetes.DESCR)" ] }, { @@ -160,7 +123,9 @@ "metadata": {}, "outputs": [], "source": [ - "# your answer here \n" + "# 1. there are 10 attributes, they describe different body values\n", + "# 2. data is baseline, target is progression after one year\n", + "# 3. Number of Instances: 442" ] }, { @@ -178,25 +143,18 @@ "metadata": {}, "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - "Shape of 'data' :\n" - ] - }, - { + "output_type": "execute_result", "data": { "text/plain": [ "(442, 10)" ] }, - "execution_count": 6, "metadata": {}, - "output_type": "execute_result" + "execution_count": 6 } ], "source": [ - "# your code here\n" + "diabetes['data'].shape" ] }, { @@ -205,24 +163,19 @@ "metadata": {}, "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - "Shape of 'target' :\n" - ] - }, - { + "output_type": "execute_result", "data": { "text/plain": [ "(442,)" ] }, - "execution_count": 7, "metadata": {}, - "output_type": "execute_result" + "execution_count": 7 } ], - "source": [] + "source": [ + "diabetes['target'].shape" + ] }, { "cell_type": "markdown", @@ -262,7 +215,7 @@ "metadata": {}, "outputs": [], "source": [ - "# your code here\n" + "from sklearn.linear_model import LinearRegression\n" ] }, { @@ -278,7 +231,7 @@ "metadata": {}, "outputs": [], "source": [ - "# your code here\n" + "diabetes_model = LinearRegression()" ] }, { @@ -296,7 +249,10 @@ "metadata": {}, "outputs": [], "source": [ - "# your code here\n" + "diabetes_data_train = diabetes['data'][:421]\n", + "diabetes_data_test = diabetes['data'][422:]\n", + "diabetes_target_train = diabetes['target'][:421]\n", + "diabetes_target_test = diabetes['target'][422:]" ] }, { @@ -308,55 +264,65 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 11, "metadata": {}, "outputs": [ { + "output_type": "execute_result", "data": { "text/plain": [ "LinearRegression()" ] }, - "execution_count": 14, "metadata": {}, - "output_type": "execute_result" + "execution_count": 11 } ], "source": [ - "# your code here\n" + "diabetes_model.fit(diabetes_data_train, diabetes_target_train)" ] }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 12, "metadata": {}, "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - "Intercept: 152.76430691633442\n" - ] + "output_type": "execute_result", + "data": { + "text/plain": [ + "152.718929651221" + ] + }, + "metadata": {}, + "execution_count": 12 } ], - "source": [] + "source": [ + "diabetes_model.intercept_" + ] }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 13, "metadata": {}, "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - "Coefficients: [ 3.03499549e-01 -2.37639315e+02 5.10530605e+02 3.27736980e+02\n", - " -8.14131709e+02 4.92814588e+02 1.02848452e+02 1.84606489e+02\n", - " 7.43519617e+02 7.60951722e+01]\n" - ] + "output_type": "execute_result", + "data": { + "text/plain": [ + "array([-1.81774793e-01, -2.38292415e+02, 5.10802601e+02, 3.27726454e+02,\n", + " -8.09873327e+02, 4.89841345e+02, 1.00217410e+02, 1.81974785e+02,\n", + " 7.41715416e+02, 7.66594057e+01])" + ] + }, + "metadata": {}, + "execution_count": 13 } ], - "source": [] + "source": [ + "diabetes_model.coef_" + ] }, { "cell_type": "markdown", @@ -376,11 +342,27 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 14, "metadata": {}, - "outputs": [], + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "array([197.59839294, 155.14324745, 172.75435806, 111.60251429,\n", + " 164.65611632, 131.10577235, 259.0612676 , 100.4923271 ,\n", + " 116.98458327, 124.2054576 , 218.37929583, 61.18798499,\n", + " 132.34126512, 120.33334972, 52.61053967, 193.95740798,\n", + " 102.50966048, 123.53318546, 210.98571577, 52.52007012])" + ] + }, + "metadata": {}, + "execution_count": 14 + } + ], "source": [ - "# your code here\n" + "y_pred = diabetes_model.predict(diabetes_data_test)\n", + "y_pred" ] }, { @@ -390,6 +372,27 @@ "#### Print your `diabetes_target_test` and compare with the prediction. " ] }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "array([233., 91., 111., 152., 120., 67., 310., 94., 183., 66., 173.,\n", + " 72., 49., 64., 48., 178., 104., 132., 220., 57.])" + ] + }, + "metadata": {}, + "execution_count": 15 + } + ], + "source": [ + "diabetes_target_test" + ] + }, { "cell_type": "code", "execution_count": 23, @@ -460,7 +463,7 @@ "metadata": {}, "outputs": [], "source": [ - "# your answer here \n" + "# they are not same but similar, the train data and test data should not give the same results" ] }, { @@ -587,11 +590,11 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 31, "metadata": {}, "outputs": [], "source": [ - "# your code here\n" + "auto = pd.read_csv('../data/auto-mpg.csv')" ] }, { @@ -603,100 +606,12 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 32, "metadata": {}, "outputs": [ { + "output_type": "execute_result", "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
mpgcylindersdisplacementhorse_powerweightaccelerationmodel_yearcar_name
018.08307.0130.0350412.070\\t\"chevrolet chevelle malibu\"
115.08350.0165.0369311.570\\t\"buick skylark 320\"
218.08318.0150.0343611.070\\t\"plymouth satellite\"
316.08304.0150.0343312.070\\t\"amc rebel sst\"
417.08302.0140.0344910.570\\t\"ford torino\"
\n", - "
" - ], "text/plain": [ " mpg cylinders displacement horse_power weight acceleration \\\n", "0 18.0 8 307.0 130.0 3504 12.0 \n", @@ -711,15 +626,15 @@ "2 70 \\t\"plymouth satellite\" \n", "3 70 \\t\"amc rebel sst\" \n", "4 70 \\t\"ford torino\" " - ] + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
mpgcylindersdisplacementhorse_powerweightaccelerationmodel_yearcar_name
018.08307.0130.0350412.070\\t\"chevrolet chevelle malibu\"
115.08350.0165.0369311.570\\t\"buick skylark 320\"
218.08318.0150.0343611.070\\t\"plymouth satellite\"
316.08304.0150.0343312.070\\t\"amc rebel sst\"
417.08302.0140.0344910.570\\t\"ford torino\"
\n
" }, - "execution_count": 27, "metadata": {}, - "output_type": "execute_result" + "execution_count": 32 } ], "source": [ - "# your code here\n" + "auto.head()" ] }, { @@ -731,33 +646,19 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 33, "metadata": {}, "outputs": [ { - "name": "stdout", "output_type": "stream", + "name": "stdout", "text": [ - "\n", - "RangeIndex: 398 entries, 0 to 397\n", - "Data columns (total 8 columns):\n", - " # Column Non-Null Count Dtype \n", - "--- ------ -------------- ----- \n", - " 0 mpg 398 non-null float64\n", - " 1 cylinders 398 non-null int64 \n", - " 2 displacement 398 non-null float64\n", - " 3 horse_power 392 non-null float64\n", - " 4 weight 398 non-null int64 \n", - " 5 acceleration 398 non-null float64\n", - " 6 model_year 398 non-null int64 \n", - " 7 car_name 398 non-null object \n", - "dtypes: float64(4), int64(3), object(1)\n", - "memory usage: 25.0+ KB\n" + "\nRangeIndex: 398 entries, 0 to 397\nData columns (total 8 columns):\n # Column Non-Null Count Dtype \n--- ------ -------------- ----- \n 0 mpg 398 non-null float64\n 1 cylinders 398 non-null int64 \n 2 displacement 398 non-null float64\n 3 horse_power 392 non-null float64\n 4 weight 398 non-null int64 \n 5 acceleration 398 non-null float64\n 6 model_year 398 non-null int64 \n 7 car_name 398 non-null object \ndtypes: float64(4), int64(3), object(1)\nmemory usage: 25.0+ KB\n" ] } ], "source": [ - "# your code here\n" + "auto.info()\n" ] }, { @@ -769,43 +670,44 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 34, "metadata": {}, "outputs": [ { + "output_type": "execute_result", "data": { "text/plain": [ - "70" + "82" ] }, - "execution_count": 24, "metadata": {}, - "output_type": "execute_result" + "execution_count": 34 } ], "source": [ - "# your code here\n", - "# OLDEST MODEL\n" + "#Newest model year\n", + "auto[\"model_year\"].max()" ] }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 35, "metadata": {}, "outputs": [ { + "output_type": "execute_result", "data": { "text/plain": [ - "82" + "70" ] }, - "execution_count": 25, "metadata": {}, - "output_type": "execute_result" + "execution_count": 35 } ], "source": [ - "# NEWEST MODEL \n" + "#oldest model year\n", + "auto[\"model_year\"].min()" ] }, { @@ -817,11 +719,67 @@ }, { "cell_type": "code", - "execution_count": 28, + "execution_count": 36, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "mpg 0\n", + "cylinders 0\n", + "displacement 0\n", + "horse_power 6\n", + "weight 0\n", + "acceleration 0\n", + "model_year 0\n", + "car_name 0\n", + "dtype: int64" + ] + }, + "metadata": {}, + "execution_count": 36 + } + ], + "source": [ + "auto.isna().sum()" + ] + }, + { + "cell_type": "code", + "execution_count": 37, "metadata": {}, "outputs": [], "source": [ - "# your code here\n" + "auto = auto.dropna()" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "mpg 0\n", + "cylinders 0\n", + "displacement 0\n", + "horse_power 0\n", + "weight 0\n", + "acceleration 0\n", + "model_year 0\n", + "car_name 0\n", + "dtype: int64" + ] + }, + "metadata": {}, + "execution_count": 38 + } + ], + "source": [ + "auto.isna().sum()" ] }, { @@ -833,10 +791,11 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 39, "metadata": {}, "outputs": [ { + "output_type": "execute_result", "data": { "text/plain": [ "4 199\n", @@ -847,13 +806,13 @@ "Name: cylinders, dtype: int64" ] }, - "execution_count": 29, "metadata": {}, - "output_type": "execute_result" + "execution_count": 39 } ], "source": [ - "# your code here \n" + "# there are 5 possible cylinders\n", + "auto['cylinders'].value_counts()" ] }, { @@ -869,11 +828,58 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 40, "metadata": {}, "outputs": [], "source": [ - "# your code here\n" + "auto.drop(['car_name'], axis = 1, inplace = True)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " mpg cylinders displacement horse_power weight acceleration \\\n", + "0 18.0 8 307.0 130.0 3504 12.0 \n", + "1 15.0 8 350.0 165.0 3693 11.5 \n", + "2 18.0 8 318.0 150.0 3436 11.0 \n", + "3 16.0 8 304.0 150.0 3433 12.0 \n", + "4 17.0 8 302.0 140.0 3449 10.5 \n", + ".. ... ... ... ... ... ... \n", + "393 27.0 4 140.0 86.0 2790 15.6 \n", + "394 44.0 4 97.0 52.0 2130 24.6 \n", + "395 32.0 4 135.0 84.0 2295 11.6 \n", + "396 28.0 4 120.0 79.0 2625 18.6 \n", + "397 31.0 4 119.0 82.0 2720 19.4 \n", + "\n", + " model_year \n", + "0 70 \n", + "1 70 \n", + "2 70 \n", + "3 70 \n", + "4 70 \n", + ".. ... \n", + "393 82 \n", + "394 82 \n", + "395 82 \n", + "396 82 \n", + "397 82 \n", + "\n", + "[392 rows x 7 columns]" + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
mpgcylindersdisplacementhorse_powerweightaccelerationmodel_year
018.08307.0130.0350412.070
115.08350.0165.0369311.570
218.08318.0150.0343611.070
316.08304.0150.0343312.070
417.08302.0140.0344910.570
........................
39327.04140.086.0279015.682
39444.0497.052.0213024.682
39532.04135.084.0229511.682
39628.04120.079.0262518.682
39731.04119.082.0272019.482
\n

392 rows × 7 columns

\n
" + }, + "metadata": {}, + "execution_count": 43 + } + ], + "source": [ + "auto" ] }, { @@ -887,11 +893,59 @@ }, { "cell_type": "code", - "execution_count": 37, + "execution_count": 44, "metadata": {}, "outputs": [], "source": [ - "# your code here\n" + "from sklearn.model_selection import train_test_split" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "metadata": {}, + "outputs": [], + "source": [ + "X = auto.drop('mpg', axis=1)\n", + "y = auto['mpg']" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "metadata": {}, + "outputs": [], + "source": [ + "X_train, X_test, y_train, y_test = train_test_split(X, y, train_size= 0.8, random_state = 365)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 47, + "metadata": {}, + "outputs": [], + "source": [ + "auto_model = LinearRegression()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 48, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "LinearRegression()" + ] + }, + "metadata": {}, + "execution_count": 48 + } + ], + "source": [ + "auto_model.fit(X_train, y_train)\n" ] }, { @@ -921,22 +975,40 @@ }, { "cell_type": "code", - "execution_count": 38, + "execution_count": 49, + "metadata": {}, + "outputs": [], + "source": [ + "y_pred = auto_model.predict(X_train)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 50, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.metrics import r2_score" + ] + }, + { + "cell_type": "code", + "execution_count": 51, "metadata": {}, "outputs": [ { + "output_type": "execute_result", "data": { "text/plain": [ - "0.8198690008457218" + "0.7747869393186666" ] }, - "execution_count": 38, "metadata": {}, - "output_type": "execute_result" + "execution_count": 51 } ], "source": [ - "# your code here\n" + "r2_score( y_pred, y_train) #TRAIN" ] }, { @@ -952,22 +1024,31 @@ }, { "cell_type": "code", - "execution_count": 39, + "execution_count": 52, + "metadata": {}, + "outputs": [], + "source": [ + "y_test_pred = auto_model.predict(X_test)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 53, "metadata": {}, "outputs": [ { + "output_type": "execute_result", "data": { "text/plain": [ - "0.7507754274816084" + "0.7511712270570078" ] }, - "execution_count": 39, "metadata": {}, - "output_type": "execute_result" + "execution_count": 53 } ], "source": [ - "# your code here\n" + "r2_score( y_test_pred, y_test) #TEST\n" ] }, { @@ -985,7 +1066,7 @@ "metadata": {}, "outputs": [], "source": [ - "# your answer here\n" + "# the r2 score of the training set is slightly higher than the test set, some improvement is possible but still a good result" ] }, { @@ -1001,11 +1082,21 @@ }, { "cell_type": "code", - "execution_count": 32, + "execution_count": 54, "metadata": {}, "outputs": [], "source": [ - "# your code here\n" + "X = auto.drop('mpg', axis=1)\n", + "y = auto['mpg']" + ] + }, + { + "cell_type": "code", + "execution_count": 55, + "metadata": {}, + "outputs": [], + "source": [ + "X_train09, X_test09, y_train09, y_test09 = train_test_split(X, y, random_state = 34, train_size= 0.9)\n" ] }, { @@ -1017,11 +1108,31 @@ }, { "cell_type": "code", - "execution_count": 36, + "execution_count": 56, "metadata": {}, "outputs": [], "source": [ - "# your code here\n" + "auto_model09 = LinearRegression()" + ] + }, + { + "cell_type": "code", + "execution_count": 57, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "LinearRegression()" + ] + }, + "metadata": {}, + "execution_count": 57 + } + ], + "source": [ + "auto_model09.fit(X_train09, y_train09)" ] }, { @@ -1033,24 +1144,40 @@ }, { "cell_type": "code", - "execution_count": 39, + "execution_count": 58, + "metadata": {}, + "outputs": [], + "source": [ + "y_pred09 = auto_model09.predict(X_train09) #TRAIN" + ] + }, + { + "cell_type": "code", + "execution_count": 59, "metadata": {}, "outputs": [ { + "output_type": "execute_result", "data": { "text/plain": [ - "0.8109551916128583" + "0.7672121730199706" ] }, - "execution_count": 39, "metadata": {}, - "output_type": "execute_result" + "execution_count": 59 } ], "source": [ - "# your code here\n" + "r2_score( y_pred09, y_train09)\n" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, { "cell_type": "markdown", "metadata": {}, @@ -1060,22 +1187,40 @@ }, { "cell_type": "code", - "execution_count": 40, + "execution_count": 60, + "metadata": {}, + "outputs": [], + "source": [ + "y_test_pred09 = auto_model09.predict(X_test09) #TEST" + ] + }, + { + "cell_type": "code", + "execution_count": 61, "metadata": {}, "outputs": [ { + "output_type": "execute_result", "data": { "text/plain": [ - "0.7913151386161112" + "0.6851313131247594" ] }, - "execution_count": 40, "metadata": {}, - "output_type": "execute_result" + "execution_count": 61 } ], "source": [ - "# your code here\n" + "r2_score( y_test_pred09, y_test09)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# train test has slightly better result, test set is worse than before" ] }, { @@ -1203,9 +1348,8 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" + "name": "python3", + "display_name": "Python 3.8.8 64-bit ('base': conda)" }, "language_info": { "codemirror_mode": { @@ -1217,9 +1361,12 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.3" + "version": "3.8.8" + }, + "interpreter": { + "hash": "05b3b62c7789db94d00c942f2499dd9a2876ea971fd9b7cf25c50892de6c631c" } }, "nbformat": 4, "nbformat_minor": 2 -} +} \ No newline at end of file