diff --git a/your-code/main.ipynb b/your-code/main.ipynb
index 8818016..7095e03 100644
--- a/your-code/main.ipynb
+++ b/your-code/main.ipynb
@@ -44,7 +44,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# your code here\n"
+    "diabetes = datasets.load_diabetes()"
    ]
   },
   {
@@ -56,22 +56,22 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 4,
    "metadata": {},
    "outputs": [
     {
+     "output_type": "execute_result",
      "data": {
       "text/plain": [
        "dict_keys(['data', 'target', 'frame', 'DESCR', 'feature_names', 'data_filename', 'target_filename'])"
       ]
      },
-     "execution_count": 3,
      "metadata": {},
-     "output_type": "execute_result"
+     "execution_count": 4
     }
    ],
    "source": [
-    "# your code here\n"
+    "diabetes.keys()"
    ]
   },
   {
@@ -87,58 +87,21 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 5,
    "metadata": {
     "scrolled": false
    },
    "outputs": [
     {
-     "name": "stdout",
      "output_type": "stream",
+     "name": "stdout",
      "text": [
-      ".. _diabetes_dataset:\n",
-      "\n",
-      "Diabetes dataset\n",
-      "----------------\n",
-      "\n",
-      "Ten baseline variables, age, sex, body mass index, average blood\n",
-      "pressure, and six blood serum measurements were obtained for each of n =\n",
-      "442 diabetes patients, as well as the response of interest, a\n",
-      "quantitative measure of disease progression one year after baseline.\n",
-      "\n",
-      "**Data Set Characteristics:**\n",
-      "\n",
-      "  :Number of Instances: 442\n",
-      "\n",
-      "  :Number of Attributes: First 10 columns are numeric predictive values\n",
-      "\n",
-      "  :Target: Column 11 is a quantitative measure of disease progression one year after baseline\n",
-      "\n",
-      "  :Attribute Information:\n",
-      "      - age     age in years\n",
-      "      - sex\n",
-      "      - bmi     body mass index\n",
-      "      - bp      average blood pressure\n",
-      "      - s1      tc, T-Cells (a type of white blood cells)\n",
-      "      - s2      ldl, low-density lipoproteins\n",
-      "      - s3      hdl, high-density lipoproteins\n",
-      "      - s4      tch, thyroid stimulating hormone\n",
-      "      - s5      ltg, lamotrigine\n",
-      "      - s6      glu, blood sugar level\n",
-      "\n",
-      "Note: Each of these 10 feature variables have been mean centered and scaled by the standard deviation times `n_samples` (i.e. the sum of squares of each column totals 1).\n",
-      "\n",
-      "Source URL:\n",
-      "https://www4.stat.ncsu.edu/~boos/var.select/diabetes.html\n",
-      "\n",
-      "For more information see:\n",
-      "Bradley Efron, Trevor Hastie, Iain Johnstone and Robert Tibshirani (2004) \"Least Angle Regression,\" Annals of Statistics (with discussion), 407-499.\n",
-      "(https://web.stanford.edu/~hastie/Papers/LARS/LeastAngle_2002.pdf)\n"
+      ".. _diabetes_dataset:\n\nDiabetes dataset\n----------------\n\nTen baseline variables, age, sex, body mass index, average blood\npressure, and six blood serum measurements were obtained for each of n =\n442 diabetes patients, as well as the response of interest, a\nquantitative measure of disease progression one year after baseline.\n\n**Data Set Characteristics:**\n\n  :Number of Instances: 442\n\n  :Number of Attributes: First 10 columns are numeric predictive values\n\n  :Target: Column 11 is a quantitative measure of disease progression one year after baseline\n\n  :Attribute Information:\n      - age     age in years\n      - sex\n      - bmi     body mass index\n      - bp      average blood pressure\n      - s1      tc, T-Cells (a type of white blood cells)\n      - s2      ldl, low-density lipoproteins\n      - s3      hdl, high-density lipoproteins\n      - s4      tch, thyroid stimulating hormone\n      - s5      ltg, lamotrigine\n      - s6      glu, blood sugar level\n\nNote: Each of these 10 feature variables have been mean centered and scaled by the standard deviation times `n_samples` (i.e. the sum of squares of each column totals 1).\n\nSource URL:\nhttps://www4.stat.ncsu.edu/~boos/var.select/diabetes.html\n\nFor more information see:\nBradley Efron, Trevor Hastie, Iain Johnstone and Robert Tibshirani (2004) \"Least Angle Regression,\" Annals of Statistics (with discussion), 407-499.\n(https://web.stanford.edu/~hastie/Papers/LARS/LeastAngle_2002.pdf)\n"
      ]
     }
    ],
    "source": [
-    "# your code here\n"
+    "print(diabetes.DESCR)"
    ]
   },
   {
@@ -160,7 +123,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# your answer here \n"
+    "# 1. there are 10 attributes, they describe different body values\n",
+    "# 2. data is baseline, target is progression after one year\n",
+    "# 3. Number of Instances: 442"
    ]
   },
   {
@@ -178,25 +143,18 @@
    "metadata": {},
    "outputs": [
     {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Shape of 'data' :\n"
-     ]
-    },
-    {
+     "output_type": "execute_result",
      "data": {
       "text/plain": [
        "(442, 10)"
       ]
      },
-     "execution_count": 6,
      "metadata": {},
-     "output_type": "execute_result"
+     "execution_count": 6
     }
    ],
    "source": [
-    "# your code here\n"
+    "diabetes['data'].shape"
    ]
   },
   {
@@ -205,24 +163,19 @@
    "metadata": {},
    "outputs": [
     {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Shape of 'target' :\n"
-     ]
-    },
-    {
+     "output_type": "execute_result",
      "data": {
       "text/plain": [
        "(442,)"
       ]
      },
-     "execution_count": 7,
      "metadata": {},
-     "output_type": "execute_result"
+     "execution_count": 7
     }
    ],
-   "source": []
+   "source": [
+    "diabetes['target'].shape"
+   ]
   },
   {
    "cell_type": "markdown",
@@ -262,7 +215,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# your code here\n"
+    "from sklearn.linear_model import LinearRegression\n"
    ]
   },
   {
@@ -278,7 +231,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# your code here\n"
+    "diabetes_model = LinearRegression()"
    ]
   },
   {
@@ -296,7 +249,10 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# your code here\n"
+    "diabetes_data_train = diabetes['data'][:421]\n",
+    "diabetes_data_test = diabetes['data'][422:]\n",
+    "diabetes_target_train = diabetes['target'][:421]\n",
+    "diabetes_target_test = diabetes['target'][422:]"
    ]
   },
   {
@@ -308,55 +264,65 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": 11,
    "metadata": {},
    "outputs": [
     {
+     "output_type": "execute_result",
      "data": {
       "text/plain": [
        "LinearRegression()"
       ]
      },
-     "execution_count": 14,
      "metadata": {},
-     "output_type": "execute_result"
+     "execution_count": 11
     }
    ],
    "source": [
-    "# your code here\n"
+    "diabetes_model.fit(diabetes_data_train, diabetes_target_train)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 17,
+   "execution_count": 12,
    "metadata": {},
    "outputs": [
     {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Intercept: 152.76430691633442\n"
-     ]
+     "output_type": "execute_result",
+     "data": {
+      "text/plain": [
+       "152.718929651221"
+      ]
+     },
+     "metadata": {},
+     "execution_count": 12
     }
    ],
-   "source": []
+   "source": [
+    "diabetes_model.intercept_"
+   ]
   },
   {
    "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": 13,
    "metadata": {},
    "outputs": [
     {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Coefficients: [ 3.03499549e-01 -2.37639315e+02  5.10530605e+02  3.27736980e+02\n",
-      " -8.14131709e+02  4.92814588e+02  1.02848452e+02  1.84606489e+02\n",
-      "  7.43519617e+02  7.60951722e+01]\n"
-     ]
+     "output_type": "execute_result",
+     "data": {
+      "text/plain": [
+       "array([-1.81774793e-01, -2.38292415e+02,  5.10802601e+02,  3.27726454e+02,\n",
+       "       -8.09873327e+02,  4.89841345e+02,  1.00217410e+02,  1.81974785e+02,\n",
+       "        7.41715416e+02,  7.66594057e+01])"
+      ]
+     },
+     "metadata": {},
+     "execution_count": 13
     }
    ],
-   "source": []
+   "source": [
+    "diabetes_model.coef_"
+   ]
   },
   {
    "cell_type": "markdown",
@@ -376,11 +342,27 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 19,
+   "execution_count": 14,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "output_type": "execute_result",
+     "data": {
+      "text/plain": [
+       "array([197.59839294, 155.14324745, 172.75435806, 111.60251429,\n",
+       "       164.65611632, 131.10577235, 259.0612676 , 100.4923271 ,\n",
+       "       116.98458327, 124.2054576 , 218.37929583,  61.18798499,\n",
+       "       132.34126512, 120.33334972,  52.61053967, 193.95740798,\n",
+       "       102.50966048, 123.53318546, 210.98571577,  52.52007012])"
+      ]
+     },
+     "metadata": {},
+     "execution_count": 14
+    }
+   ],
    "source": [
-    "# your code here\n"
+    "y_pred = diabetes_model.predict(diabetes_data_test)\n",
+    "y_pred"
    ]
   },
   {
@@ -390,6 +372,27 @@
     "#### Print your `diabetes_target_test` and compare with the prediction. "
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [
+    {
+     "output_type": "execute_result",
+     "data": {
+      "text/plain": [
+       "array([233.,  91., 111., 152., 120.,  67., 310.,  94., 183.,  66., 173.,\n",
+       "        72.,  49.,  64.,  48., 178., 104., 132., 220.,  57.])"
+      ]
+     },
+     "metadata": {},
+     "execution_count": 15
+    }
+   ],
+   "source": [
+    "diabetes_target_test"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 23,
@@ -460,7 +463,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# your answer here \n"
+    "# they are not same but similar, the train data and test data should not give the same results"
    ]
   },
   {
@@ -587,11 +590,11 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 26,
+   "execution_count": 31,
    "metadata": {},
    "outputs": [],
    "source": [
-    "# your code here\n"
+    "auto = pd.read_csv('../data/auto-mpg.csv')"
    ]
   },
   {
@@ -603,100 +606,12 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 27,
+   "execution_count": 32,
    "metadata": {},
    "outputs": [
     {
+     "output_type": "execute_result",
      "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>mpg</th>\n",
-       "      <th>cylinders</th>\n",
-       "      <th>displacement</th>\n",
-       "      <th>horse_power</th>\n",
-       "      <th>weight</th>\n",
-       "      <th>acceleration</th>\n",
-       "      <th>model_year</th>\n",
-       "      <th>car_name</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>18.0</td>\n",
-       "      <td>8</td>\n",
-       "      <td>307.0</td>\n",
-       "      <td>130.0</td>\n",
-       "      <td>3504</td>\n",
-       "      <td>12.0</td>\n",
-       "      <td>70</td>\n",
-       "      <td>\\t\"chevrolet chevelle malibu\"</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>15.0</td>\n",
-       "      <td>8</td>\n",
-       "      <td>350.0</td>\n",
-       "      <td>165.0</td>\n",
-       "      <td>3693</td>\n",
-       "      <td>11.5</td>\n",
-       "      <td>70</td>\n",
-       "      <td>\\t\"buick skylark 320\"</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>18.0</td>\n",
-       "      <td>8</td>\n",
-       "      <td>318.0</td>\n",
-       "      <td>150.0</td>\n",
-       "      <td>3436</td>\n",
-       "      <td>11.0</td>\n",
-       "      <td>70</td>\n",
-       "      <td>\\t\"plymouth satellite\"</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>16.0</td>\n",
-       "      <td>8</td>\n",
-       "      <td>304.0</td>\n",
-       "      <td>150.0</td>\n",
-       "      <td>3433</td>\n",
-       "      <td>12.0</td>\n",
-       "      <td>70</td>\n",
-       "      <td>\\t\"amc rebel sst\"</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>17.0</td>\n",
-       "      <td>8</td>\n",
-       "      <td>302.0</td>\n",
-       "      <td>140.0</td>\n",
-       "      <td>3449</td>\n",
-       "      <td>10.5</td>\n",
-       "      <td>70</td>\n",
-       "      <td>\\t\"ford torino\"</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
       "text/plain": [
        "    mpg  cylinders  displacement  horse_power  weight  acceleration  \\\n",
        "0  18.0          8         307.0        130.0    3504          12.0   \n",
@@ -711,15 +626,15 @@
        "2          70         \\t\"plymouth satellite\"  \n",
        "3          70              \\t\"amc rebel sst\"  \n",
        "4          70                \\t\"ford torino\"  "
-      ]
+      ],
+      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>mpg</th>\n      <th>cylinders</th>\n      <th>displacement</th>\n      <th>horse_power</th>\n      <th>weight</th>\n      <th>acceleration</th>\n      <th>model_year</th>\n      <th>car_name</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>18.0</td>\n      <td>8</td>\n      <td>307.0</td>\n      <td>130.0</td>\n      <td>3504</td>\n      <td>12.0</td>\n      <td>70</td>\n      <td>\\t\"chevrolet chevelle malibu\"</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>15.0</td>\n      <td>8</td>\n      <td>350.0</td>\n      <td>165.0</td>\n      <td>3693</td>\n      <td>11.5</td>\n      <td>70</td>\n      <td>\\t\"buick skylark 320\"</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>18.0</td>\n      <td>8</td>\n      <td>318.0</td>\n      <td>150.0</td>\n      <td>3436</td>\n      <td>11.0</td>\n      <td>70</td>\n      <td>\\t\"plymouth satellite\"</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>16.0</td>\n      <td>8</td>\n      <td>304.0</td>\n      <td>150.0</td>\n      <td>3433</td>\n      <td>12.0</td>\n      <td>70</td>\n      <td>\\t\"amc rebel sst\"</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>17.0</td>\n      <td>8</td>\n      <td>302.0</td>\n      <td>140.0</td>\n      <td>3449</td>\n      <td>10.5</td>\n      <td>70</td>\n      <td>\\t\"ford torino\"</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
      },
-     "execution_count": 27,
      "metadata": {},
-     "output_type": "execute_result"
+     "execution_count": 32
     }
    ],
    "source": [
-    "# your code here\n"
+    "auto.head()"
    ]
   },
   {
@@ -731,33 +646,19 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 23,
+   "execution_count": 33,
    "metadata": {},
    "outputs": [
     {
-     "name": "stdout",
      "output_type": "stream",
+     "name": "stdout",
      "text": [
-      "<class 'pandas.core.frame.DataFrame'>\n",
-      "RangeIndex: 398 entries, 0 to 397\n",
-      "Data columns (total 8 columns):\n",
-      " #   Column        Non-Null Count  Dtype  \n",
-      "---  ------        --------------  -----  \n",
-      " 0   mpg           398 non-null    float64\n",
-      " 1   cylinders     398 non-null    int64  \n",
-      " 2   displacement  398 non-null    float64\n",
-      " 3   horse_power   392 non-null    float64\n",
-      " 4   weight        398 non-null    int64  \n",
-      " 5   acceleration  398 non-null    float64\n",
-      " 6   model_year    398 non-null    int64  \n",
-      " 7   car_name      398 non-null    object \n",
-      "dtypes: float64(4), int64(3), object(1)\n",
-      "memory usage: 25.0+ KB\n"
+      "<class 'pandas.core.frame.DataFrame'>\nRangeIndex: 398 entries, 0 to 397\nData columns (total 8 columns):\n #   Column        Non-Null Count  Dtype  \n---  ------        --------------  -----  \n 0   mpg           398 non-null    float64\n 1   cylinders     398 non-null    int64  \n 2   displacement  398 non-null    float64\n 3   horse_power   392 non-null    float64\n 4   weight        398 non-null    int64  \n 5   acceleration  398 non-null    float64\n 6   model_year    398 non-null    int64  \n 7   car_name      398 non-null    object \ndtypes: float64(4), int64(3), object(1)\nmemory usage: 25.0+ KB\n"
      ]
     }
    ],
    "source": [
-    "# your code here\n"
+    "auto.info()\n"
    ]
   },
   {
@@ -769,43 +670,44 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 24,
+   "execution_count": 34,
    "metadata": {},
    "outputs": [
     {
+     "output_type": "execute_result",
      "data": {
       "text/plain": [
-       "70"
+       "82"
       ]
      },
-     "execution_count": 24,
      "metadata": {},
-     "output_type": "execute_result"
+     "execution_count": 34
     }
    ],
    "source": [
-    "# your code here\n",
-    "# OLDEST MODEL\n"
+    "#Newest model year\n",
+    "auto[\"model_year\"].max()"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 25,
+   "execution_count": 35,
    "metadata": {},
    "outputs": [
     {
+     "output_type": "execute_result",
      "data": {
       "text/plain": [
-       "82"
+       "70"
       ]
      },
-     "execution_count": 25,
      "metadata": {},
-     "output_type": "execute_result"
+     "execution_count": 35
     }
    ],
    "source": [
-    "# NEWEST MODEL \n"
+    "#oldest model year\n",
+    "auto[\"model_year\"].min()"
    ]
   },
   {
@@ -817,11 +719,67 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 28,
+   "execution_count": 36,
+   "metadata": {},
+   "outputs": [
+    {
+     "output_type": "execute_result",
+     "data": {
+      "text/plain": [
+       "mpg             0\n",
+       "cylinders       0\n",
+       "displacement    0\n",
+       "horse_power     6\n",
+       "weight          0\n",
+       "acceleration    0\n",
+       "model_year      0\n",
+       "car_name        0\n",
+       "dtype: int64"
+      ]
+     },
+     "metadata": {},
+     "execution_count": 36
+    }
+   ],
+   "source": [
+    "auto.isna().sum()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 37,
    "metadata": {},
    "outputs": [],
    "source": [
-    "# your code here\n"
+    "auto = auto.dropna()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 38,
+   "metadata": {},
+   "outputs": [
+    {
+     "output_type": "execute_result",
+     "data": {
+      "text/plain": [
+       "mpg             0\n",
+       "cylinders       0\n",
+       "displacement    0\n",
+       "horse_power     0\n",
+       "weight          0\n",
+       "acceleration    0\n",
+       "model_year      0\n",
+       "car_name        0\n",
+       "dtype: int64"
+      ]
+     },
+     "metadata": {},
+     "execution_count": 38
+    }
+   ],
+   "source": [
+    "auto.isna().sum()"
    ]
   },
   {
@@ -833,10 +791,11 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 29,
+   "execution_count": 39,
    "metadata": {},
    "outputs": [
     {
+     "output_type": "execute_result",
      "data": {
       "text/plain": [
        "4    199\n",
@@ -847,13 +806,13 @@
        "Name: cylinders, dtype: int64"
       ]
      },
-     "execution_count": 29,
      "metadata": {},
-     "output_type": "execute_result"
+     "execution_count": 39
     }
    ],
    "source": [
-    "# your code here \n"
+    "# there are 5 possible cylinders\n",
+    "auto['cylinders'].value_counts()"
    ]
   },
   {
@@ -869,11 +828,58 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 29,
+   "execution_count": 40,
    "metadata": {},
    "outputs": [],
    "source": [
-    "# your code here\n"
+    "auto.drop(['car_name'], axis = 1, inplace = True)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 43,
+   "metadata": {},
+   "outputs": [
+    {
+     "output_type": "execute_result",
+     "data": {
+      "text/plain": [
+       "      mpg  cylinders  displacement  horse_power  weight  acceleration  \\\n",
+       "0    18.0          8         307.0        130.0    3504          12.0   \n",
+       "1    15.0          8         350.0        165.0    3693          11.5   \n",
+       "2    18.0          8         318.0        150.0    3436          11.0   \n",
+       "3    16.0          8         304.0        150.0    3433          12.0   \n",
+       "4    17.0          8         302.0        140.0    3449          10.5   \n",
+       "..    ...        ...           ...          ...     ...           ...   \n",
+       "393  27.0          4         140.0         86.0    2790          15.6   \n",
+       "394  44.0          4          97.0         52.0    2130          24.6   \n",
+       "395  32.0          4         135.0         84.0    2295          11.6   \n",
+       "396  28.0          4         120.0         79.0    2625          18.6   \n",
+       "397  31.0          4         119.0         82.0    2720          19.4   \n",
+       "\n",
+       "     model_year  \n",
+       "0            70  \n",
+       "1            70  \n",
+       "2            70  \n",
+       "3            70  \n",
+       "4            70  \n",
+       "..          ...  \n",
+       "393          82  \n",
+       "394          82  \n",
+       "395          82  \n",
+       "396          82  \n",
+       "397          82  \n",
+       "\n",
+       "[392 rows x 7 columns]"
+      ],
+      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>mpg</th>\n      <th>cylinders</th>\n      <th>displacement</th>\n      <th>horse_power</th>\n      <th>weight</th>\n      <th>acceleration</th>\n      <th>model_year</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>18.0</td>\n      <td>8</td>\n      <td>307.0</td>\n      <td>130.0</td>\n      <td>3504</td>\n      <td>12.0</td>\n      <td>70</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>15.0</td>\n      <td>8</td>\n      <td>350.0</td>\n      <td>165.0</td>\n      <td>3693</td>\n      <td>11.5</td>\n      <td>70</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>18.0</td>\n      <td>8</td>\n      <td>318.0</td>\n      <td>150.0</td>\n      <td>3436</td>\n      <td>11.0</td>\n      <td>70</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>16.0</td>\n      <td>8</td>\n      <td>304.0</td>\n      <td>150.0</td>\n      <td>3433</td>\n      <td>12.0</td>\n      <td>70</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>17.0</td>\n      <td>8</td>\n      <td>302.0</td>\n      <td>140.0</td>\n      <td>3449</td>\n      <td>10.5</td>\n      <td>70</td>\n    </tr>\n    <tr>\n      <th>...</th>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n    </tr>\n    <tr>\n      <th>393</th>\n      <td>27.0</td>\n      <td>4</td>\n      <td>140.0</td>\n      <td>86.0</td>\n      <td>2790</td>\n      <td>15.6</td>\n      <td>82</td>\n    </tr>\n    <tr>\n      <th>394</th>\n      <td>44.0</td>\n      <td>4</td>\n      <td>97.0</td>\n      <td>52.0</td>\n      <td>2130</td>\n      <td>24.6</td>\n      <td>82</td>\n    </tr>\n    <tr>\n      <th>395</th>\n      <td>32.0</td>\n      <td>4</td>\n      <td>135.0</td>\n      <td>84.0</td>\n      <td>2295</td>\n      <td>11.6</td>\n      <td>82</td>\n    </tr>\n    <tr>\n      <th>396</th>\n      <td>28.0</td>\n      <td>4</td>\n      <td>120.0</td>\n      <td>79.0</td>\n      <td>2625</td>\n      <td>18.6</td>\n      <td>82</td>\n    </tr>\n    <tr>\n      <th>397</th>\n      <td>31.0</td>\n      <td>4</td>\n      <td>119.0</td>\n      <td>82.0</td>\n      <td>2720</td>\n      <td>19.4</td>\n      <td>82</td>\n    </tr>\n  </tbody>\n</table>\n<p>392 rows × 7 columns</p>\n</div>"
+     },
+     "metadata": {},
+     "execution_count": 43
+    }
+   ],
+   "source": [
+    "auto"
    ]
   },
   {
@@ -887,11 +893,59 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 37,
+   "execution_count": 44,
    "metadata": {},
    "outputs": [],
    "source": [
-    "# your code here\n"
+    "from sklearn.model_selection import train_test_split"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 45,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "X = auto.drop('mpg', axis=1)\n",
+    "y = auto['mpg']"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 46,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "X_train, X_test, y_train, y_test = train_test_split(X, y, train_size= 0.8, random_state = 365)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 47,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "auto_model = LinearRegression()\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 48,
+   "metadata": {},
+   "outputs": [
+    {
+     "output_type": "execute_result",
+     "data": {
+      "text/plain": [
+       "LinearRegression()"
+      ]
+     },
+     "metadata": {},
+     "execution_count": 48
+    }
+   ],
+   "source": [
+    "auto_model.fit(X_train, y_train)\n"
    ]
   },
   {
@@ -921,22 +975,40 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 38,
+   "execution_count": 49,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "y_pred = auto_model.predict(X_train)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 50,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from sklearn.metrics import r2_score"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 51,
    "metadata": {},
    "outputs": [
     {
+     "output_type": "execute_result",
      "data": {
       "text/plain": [
-       "0.8198690008457218"
+       "0.7747869393186666"
       ]
      },
-     "execution_count": 38,
      "metadata": {},
-     "output_type": "execute_result"
+     "execution_count": 51
     }
    ],
    "source": [
-    "# your code here\n"
+    "r2_score( y_pred, y_train) #TRAIN"
    ]
   },
   {
@@ -952,22 +1024,31 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 39,
+   "execution_count": 52,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "y_test_pred = auto_model.predict(X_test)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 53,
    "metadata": {},
    "outputs": [
     {
+     "output_type": "execute_result",
      "data": {
       "text/plain": [
-       "0.7507754274816084"
+       "0.7511712270570078"
       ]
      },
-     "execution_count": 39,
      "metadata": {},
-     "output_type": "execute_result"
+     "execution_count": 53
     }
    ],
    "source": [
-    "# your code here\n"
+    "r2_score( y_test_pred, y_test) #TEST\n"
    ]
   },
   {
@@ -985,7 +1066,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# your answer here\n"
+    "# the r2 score of the training set is slightly higher than the test set, some improvement is possible but still a good result"
    ]
   },
   {
@@ -1001,11 +1082,21 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 32,
+   "execution_count": 54,
    "metadata": {},
    "outputs": [],
    "source": [
-    "# your code here\n"
+    "X = auto.drop('mpg', axis=1)\n",
+    "y = auto['mpg']"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 55,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "X_train09, X_test09, y_train09, y_test09 = train_test_split(X, y, random_state = 34, train_size= 0.9)\n"
    ]
   },
   {
@@ -1017,11 +1108,31 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 36,
+   "execution_count": 56,
    "metadata": {},
    "outputs": [],
    "source": [
-    "# your code here\n"
+    "auto_model09 = LinearRegression()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 57,
+   "metadata": {},
+   "outputs": [
+    {
+     "output_type": "execute_result",
+     "data": {
+      "text/plain": [
+       "LinearRegression()"
+      ]
+     },
+     "metadata": {},
+     "execution_count": 57
+    }
+   ],
+   "source": [
+    "auto_model09.fit(X_train09, y_train09)"
    ]
   },
   {
@@ -1033,24 +1144,40 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 39,
+   "execution_count": 58,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "y_pred09 = auto_model09.predict(X_train09) #TRAIN"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 59,
    "metadata": {},
    "outputs": [
     {
+     "output_type": "execute_result",
      "data": {
       "text/plain": [
-       "0.8109551916128583"
+       "0.7672121730199706"
       ]
      },
-     "execution_count": 39,
      "metadata": {},
-     "output_type": "execute_result"
+     "execution_count": 59
     }
    ],
    "source": [
-    "# your code here\n"
+    "r2_score( y_pred09, y_train09)\n"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
   {
    "cell_type": "markdown",
    "metadata": {},
@@ -1060,22 +1187,40 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 40,
+   "execution_count": 60,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "y_test_pred09 = auto_model09.predict(X_test09) #TEST"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 61,
    "metadata": {},
    "outputs": [
     {
+     "output_type": "execute_result",
      "data": {
       "text/plain": [
-       "0.7913151386161112"
+       "0.6851313131247594"
       ]
      },
-     "execution_count": 40,
      "metadata": {},
-     "output_type": "execute_result"
+     "execution_count": 61
     }
    ],
    "source": [
-    "# your code here\n"
+    "r2_score( y_test_pred09, y_test09)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# train test has slightly better result, test set is worse than before"
    ]
   },
   {
@@ -1203,9 +1348,8 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
+   "name": "python3",
+   "display_name": "Python 3.8.8 64-bit ('base': conda)"
   },
   "language_info": {
    "codemirror_mode": {
@@ -1217,9 +1361,12 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.8.3"
+   "version": "3.8.8"
+  },
+  "interpreter": {
+   "hash": "05b3b62c7789db94d00c942f2499dd9a2876ea971fd9b7cf25c50892de6c631c"
   }
  },
  "nbformat": 4,
  "nbformat_minor": 2
-}
+}
\ No newline at end of file