diff --git a/your-code/main.ipynb b/your-code/main.ipynb
index 8818016..7095e03 100644
--- a/your-code/main.ipynb
+++ b/your-code/main.ipynb
@@ -44,7 +44,7 @@
"metadata": {},
"outputs": [],
"source": [
- "# your code here\n"
+ "diabetes = datasets.load_diabetes()"
]
},
{
@@ -56,22 +56,22 @@
},
{
"cell_type": "code",
- "execution_count": 3,
+ "execution_count": 4,
"metadata": {},
"outputs": [
{
+ "output_type": "execute_result",
"data": {
"text/plain": [
"dict_keys(['data', 'target', 'frame', 'DESCR', 'feature_names', 'data_filename', 'target_filename'])"
]
},
- "execution_count": 3,
"metadata": {},
- "output_type": "execute_result"
+ "execution_count": 4
}
],
"source": [
- "# your code here\n"
+ "diabetes.keys()"
]
},
{
@@ -87,58 +87,21 @@
},
{
"cell_type": "code",
- "execution_count": 4,
+ "execution_count": 5,
"metadata": {
"scrolled": false
},
"outputs": [
{
- "name": "stdout",
"output_type": "stream",
+ "name": "stdout",
"text": [
- ".. _diabetes_dataset:\n",
- "\n",
- "Diabetes dataset\n",
- "----------------\n",
- "\n",
- "Ten baseline variables, age, sex, body mass index, average blood\n",
- "pressure, and six blood serum measurements were obtained for each of n =\n",
- "442 diabetes patients, as well as the response of interest, a\n",
- "quantitative measure of disease progression one year after baseline.\n",
- "\n",
- "**Data Set Characteristics:**\n",
- "\n",
- " :Number of Instances: 442\n",
- "\n",
- " :Number of Attributes: First 10 columns are numeric predictive values\n",
- "\n",
- " :Target: Column 11 is a quantitative measure of disease progression one year after baseline\n",
- "\n",
- " :Attribute Information:\n",
- " - age age in years\n",
- " - sex\n",
- " - bmi body mass index\n",
- " - bp average blood pressure\n",
- " - s1 tc, T-Cells (a type of white blood cells)\n",
- " - s2 ldl, low-density lipoproteins\n",
- " - s3 hdl, high-density lipoproteins\n",
- " - s4 tch, thyroid stimulating hormone\n",
- " - s5 ltg, lamotrigine\n",
- " - s6 glu, blood sugar level\n",
- "\n",
- "Note: Each of these 10 feature variables have been mean centered and scaled by the standard deviation times `n_samples` (i.e. the sum of squares of each column totals 1).\n",
- "\n",
- "Source URL:\n",
- "https://www4.stat.ncsu.edu/~boos/var.select/diabetes.html\n",
- "\n",
- "For more information see:\n",
- "Bradley Efron, Trevor Hastie, Iain Johnstone and Robert Tibshirani (2004) \"Least Angle Regression,\" Annals of Statistics (with discussion), 407-499.\n",
- "(https://web.stanford.edu/~hastie/Papers/LARS/LeastAngle_2002.pdf)\n"
+ ".. _diabetes_dataset:\n\nDiabetes dataset\n----------------\n\nTen baseline variables, age, sex, body mass index, average blood\npressure, and six blood serum measurements were obtained for each of n =\n442 diabetes patients, as well as the response of interest, a\nquantitative measure of disease progression one year after baseline.\n\n**Data Set Characteristics:**\n\n :Number of Instances: 442\n\n :Number of Attributes: First 10 columns are numeric predictive values\n\n :Target: Column 11 is a quantitative measure of disease progression one year after baseline\n\n :Attribute Information:\n - age age in years\n - sex\n - bmi body mass index\n - bp average blood pressure\n - s1 tc, T-Cells (a type of white blood cells)\n - s2 ldl, low-density lipoproteins\n - s3 hdl, high-density lipoproteins\n - s4 tch, thyroid stimulating hormone\n - s5 ltg, lamotrigine\n - s6 glu, blood sugar level\n\nNote: Each of these 10 feature variables have been mean centered and scaled by the standard deviation times `n_samples` (i.e. the sum of squares of each column totals 1).\n\nSource URL:\nhttps://www4.stat.ncsu.edu/~boos/var.select/diabetes.html\n\nFor more information see:\nBradley Efron, Trevor Hastie, Iain Johnstone and Robert Tibshirani (2004) \"Least Angle Regression,\" Annals of Statistics (with discussion), 407-499.\n(https://web.stanford.edu/~hastie/Papers/LARS/LeastAngle_2002.pdf)\n"
]
}
],
"source": [
- "# your code here\n"
+ "print(diabetes.DESCR)"
]
},
{
@@ -160,7 +123,9 @@
"metadata": {},
"outputs": [],
"source": [
- "# your answer here \n"
+ "# 1. there are 10 attributes, they describe different body values\n",
+ "# 2. data is baseline, target is progression after one year\n",
+ "# 3. Number of Instances: 442"
]
},
{
@@ -178,25 +143,18 @@
"metadata": {},
"outputs": [
{
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Shape of 'data' :\n"
- ]
- },
- {
+ "output_type": "execute_result",
"data": {
"text/plain": [
"(442, 10)"
]
},
- "execution_count": 6,
"metadata": {},
- "output_type": "execute_result"
+ "execution_count": 6
}
],
"source": [
- "# your code here\n"
+ "diabetes['data'].shape"
]
},
{
@@ -205,24 +163,19 @@
"metadata": {},
"outputs": [
{
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Shape of 'target' :\n"
- ]
- },
- {
+ "output_type": "execute_result",
"data": {
"text/plain": [
"(442,)"
]
},
- "execution_count": 7,
"metadata": {},
- "output_type": "execute_result"
+ "execution_count": 7
}
],
- "source": []
+ "source": [
+ "diabetes['target'].shape"
+ ]
},
{
"cell_type": "markdown",
@@ -262,7 +215,7 @@
"metadata": {},
"outputs": [],
"source": [
- "# your code here\n"
+ "from sklearn.linear_model import LinearRegression\n"
]
},
{
@@ -278,7 +231,7 @@
"metadata": {},
"outputs": [],
"source": [
- "# your code here\n"
+ "diabetes_model = LinearRegression()"
]
},
{
@@ -296,7 +249,10 @@
"metadata": {},
"outputs": [],
"source": [
- "# your code here\n"
+ "diabetes_data_train = diabetes['data'][:421]\n",
+ "diabetes_data_test = diabetes['data'][422:]\n",
+ "diabetes_target_train = diabetes['target'][:421]\n",
+ "diabetes_target_test = diabetes['target'][422:]"
]
},
{
@@ -308,55 +264,65 @@
},
{
"cell_type": "code",
- "execution_count": 14,
+ "execution_count": 11,
"metadata": {},
"outputs": [
{
+ "output_type": "execute_result",
"data": {
"text/plain": [
"LinearRegression()"
]
},
- "execution_count": 14,
"metadata": {},
- "output_type": "execute_result"
+ "execution_count": 11
}
],
"source": [
- "# your code here\n"
+ "diabetes_model.fit(diabetes_data_train, diabetes_target_train)"
]
},
{
"cell_type": "code",
- "execution_count": 17,
+ "execution_count": 12,
"metadata": {},
"outputs": [
{
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Intercept: 152.76430691633442\n"
- ]
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "152.718929651221"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 12
}
],
- "source": []
+ "source": [
+ "diabetes_model.intercept_"
+ ]
},
{
"cell_type": "code",
- "execution_count": 18,
+ "execution_count": 13,
"metadata": {},
"outputs": [
{
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Coefficients: [ 3.03499549e-01 -2.37639315e+02 5.10530605e+02 3.27736980e+02\n",
- " -8.14131709e+02 4.92814588e+02 1.02848452e+02 1.84606489e+02\n",
- " 7.43519617e+02 7.60951722e+01]\n"
- ]
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "array([-1.81774793e-01, -2.38292415e+02, 5.10802601e+02, 3.27726454e+02,\n",
+ " -8.09873327e+02, 4.89841345e+02, 1.00217410e+02, 1.81974785e+02,\n",
+ " 7.41715416e+02, 7.66594057e+01])"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 13
}
],
- "source": []
+ "source": [
+ "diabetes_model.coef_"
+ ]
},
{
"cell_type": "markdown",
@@ -376,11 +342,27 @@
},
{
"cell_type": "code",
- "execution_count": 19,
+ "execution_count": 14,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "array([197.59839294, 155.14324745, 172.75435806, 111.60251429,\n",
+ " 164.65611632, 131.10577235, 259.0612676 , 100.4923271 ,\n",
+ " 116.98458327, 124.2054576 , 218.37929583, 61.18798499,\n",
+ " 132.34126512, 120.33334972, 52.61053967, 193.95740798,\n",
+ " 102.50966048, 123.53318546, 210.98571577, 52.52007012])"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 14
+ }
+ ],
"source": [
- "# your code here\n"
+ "y_pred = diabetes_model.predict(diabetes_data_test)\n",
+ "y_pred"
]
},
{
@@ -390,6 +372,27 @@
"#### Print your `diabetes_target_test` and compare with the prediction. "
]
},
+ {
+ "cell_type": "code",
+ "execution_count": 15,
+ "metadata": {},
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "array([233., 91., 111., 152., 120., 67., 310., 94., 183., 66., 173.,\n",
+ " 72., 49., 64., 48., 178., 104., 132., 220., 57.])"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 15
+ }
+ ],
+ "source": [
+ "diabetes_target_test"
+ ]
+ },
{
"cell_type": "code",
"execution_count": 23,
@@ -460,7 +463,7 @@
"metadata": {},
"outputs": [],
"source": [
- "# your answer here \n"
+ "# they are not same but similar, the train data and test data should not give the same results"
]
},
{
@@ -587,11 +590,11 @@
},
{
"cell_type": "code",
- "execution_count": 26,
+ "execution_count": 31,
"metadata": {},
"outputs": [],
"source": [
- "# your code here\n"
+ "auto = pd.read_csv('../data/auto-mpg.csv')"
]
},
{
@@ -603,100 +606,12 @@
},
{
"cell_type": "code",
- "execution_count": 27,
+ "execution_count": 32,
"metadata": {},
"outputs": [
{
+ "output_type": "execute_result",
"data": {
- "text/html": [
- "
\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " mpg | \n",
- " cylinders | \n",
- " displacement | \n",
- " horse_power | \n",
- " weight | \n",
- " acceleration | \n",
- " model_year | \n",
- " car_name | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " | 0 | \n",
- " 18.0 | \n",
- " 8 | \n",
- " 307.0 | \n",
- " 130.0 | \n",
- " 3504 | \n",
- " 12.0 | \n",
- " 70 | \n",
- " \\t\"chevrolet chevelle malibu\" | \n",
- "
\n",
- " \n",
- " | 1 | \n",
- " 15.0 | \n",
- " 8 | \n",
- " 350.0 | \n",
- " 165.0 | \n",
- " 3693 | \n",
- " 11.5 | \n",
- " 70 | \n",
- " \\t\"buick skylark 320\" | \n",
- "
\n",
- " \n",
- " | 2 | \n",
- " 18.0 | \n",
- " 8 | \n",
- " 318.0 | \n",
- " 150.0 | \n",
- " 3436 | \n",
- " 11.0 | \n",
- " 70 | \n",
- " \\t\"plymouth satellite\" | \n",
- "
\n",
- " \n",
- " | 3 | \n",
- " 16.0 | \n",
- " 8 | \n",
- " 304.0 | \n",
- " 150.0 | \n",
- " 3433 | \n",
- " 12.0 | \n",
- " 70 | \n",
- " \\t\"amc rebel sst\" | \n",
- "
\n",
- " \n",
- " | 4 | \n",
- " 17.0 | \n",
- " 8 | \n",
- " 302.0 | \n",
- " 140.0 | \n",
- " 3449 | \n",
- " 10.5 | \n",
- " 70 | \n",
- " \\t\"ford torino\" | \n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
"text/plain": [
" mpg cylinders displacement horse_power weight acceleration \\\n",
"0 18.0 8 307.0 130.0 3504 12.0 \n",
@@ -711,15 +626,15 @@
"2 70 \\t\"plymouth satellite\" \n",
"3 70 \\t\"amc rebel sst\" \n",
"4 70 \\t\"ford torino\" "
- ]
+ ],
+ "text/html": "\n\n
\n \n \n | \n mpg | \n cylinders | \n displacement | \n horse_power | \n weight | \n acceleration | \n model_year | \n car_name | \n
\n \n \n \n | 0 | \n 18.0 | \n 8 | \n 307.0 | \n 130.0 | \n 3504 | \n 12.0 | \n 70 | \n \\t\"chevrolet chevelle malibu\" | \n
\n \n | 1 | \n 15.0 | \n 8 | \n 350.0 | \n 165.0 | \n 3693 | \n 11.5 | \n 70 | \n \\t\"buick skylark 320\" | \n
\n \n | 2 | \n 18.0 | \n 8 | \n 318.0 | \n 150.0 | \n 3436 | \n 11.0 | \n 70 | \n \\t\"plymouth satellite\" | \n
\n \n | 3 | \n 16.0 | \n 8 | \n 304.0 | \n 150.0 | \n 3433 | \n 12.0 | \n 70 | \n \\t\"amc rebel sst\" | \n
\n \n | 4 | \n 17.0 | \n 8 | \n 302.0 | \n 140.0 | \n 3449 | \n 10.5 | \n 70 | \n \\t\"ford torino\" | \n
\n \n
\n
"
},
- "execution_count": 27,
"metadata": {},
- "output_type": "execute_result"
+ "execution_count": 32
}
],
"source": [
- "# your code here\n"
+ "auto.head()"
]
},
{
@@ -731,33 +646,19 @@
},
{
"cell_type": "code",
- "execution_count": 23,
+ "execution_count": 33,
"metadata": {},
"outputs": [
{
- "name": "stdout",
"output_type": "stream",
+ "name": "stdout",
"text": [
- "\n",
- "RangeIndex: 398 entries, 0 to 397\n",
- "Data columns (total 8 columns):\n",
- " # Column Non-Null Count Dtype \n",
- "--- ------ -------------- ----- \n",
- " 0 mpg 398 non-null float64\n",
- " 1 cylinders 398 non-null int64 \n",
- " 2 displacement 398 non-null float64\n",
- " 3 horse_power 392 non-null float64\n",
- " 4 weight 398 non-null int64 \n",
- " 5 acceleration 398 non-null float64\n",
- " 6 model_year 398 non-null int64 \n",
- " 7 car_name 398 non-null object \n",
- "dtypes: float64(4), int64(3), object(1)\n",
- "memory usage: 25.0+ KB\n"
+ "\nRangeIndex: 398 entries, 0 to 397\nData columns (total 8 columns):\n # Column Non-Null Count Dtype \n--- ------ -------------- ----- \n 0 mpg 398 non-null float64\n 1 cylinders 398 non-null int64 \n 2 displacement 398 non-null float64\n 3 horse_power 392 non-null float64\n 4 weight 398 non-null int64 \n 5 acceleration 398 non-null float64\n 6 model_year 398 non-null int64 \n 7 car_name 398 non-null object \ndtypes: float64(4), int64(3), object(1)\nmemory usage: 25.0+ KB\n"
]
}
],
"source": [
- "# your code here\n"
+ "auto.info()\n"
]
},
{
@@ -769,43 +670,44 @@
},
{
"cell_type": "code",
- "execution_count": 24,
+ "execution_count": 34,
"metadata": {},
"outputs": [
{
+ "output_type": "execute_result",
"data": {
"text/plain": [
- "70"
+ "82"
]
},
- "execution_count": 24,
"metadata": {},
- "output_type": "execute_result"
+ "execution_count": 34
}
],
"source": [
- "# your code here\n",
- "# OLDEST MODEL\n"
+ "#Newest model year\n",
+ "auto[\"model_year\"].max()"
]
},
{
"cell_type": "code",
- "execution_count": 25,
+ "execution_count": 35,
"metadata": {},
"outputs": [
{
+ "output_type": "execute_result",
"data": {
"text/plain": [
- "82"
+ "70"
]
},
- "execution_count": 25,
"metadata": {},
- "output_type": "execute_result"
+ "execution_count": 35
}
],
"source": [
- "# NEWEST MODEL \n"
+ "#oldest model year\n",
+ "auto[\"model_year\"].min()"
]
},
{
@@ -817,11 +719,67 @@
},
{
"cell_type": "code",
- "execution_count": 28,
+ "execution_count": 36,
+ "metadata": {},
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "mpg 0\n",
+ "cylinders 0\n",
+ "displacement 0\n",
+ "horse_power 6\n",
+ "weight 0\n",
+ "acceleration 0\n",
+ "model_year 0\n",
+ "car_name 0\n",
+ "dtype: int64"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 36
+ }
+ ],
+ "source": [
+ "auto.isna().sum()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 37,
"metadata": {},
"outputs": [],
"source": [
- "# your code here\n"
+ "auto = auto.dropna()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 38,
+ "metadata": {},
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "mpg 0\n",
+ "cylinders 0\n",
+ "displacement 0\n",
+ "horse_power 0\n",
+ "weight 0\n",
+ "acceleration 0\n",
+ "model_year 0\n",
+ "car_name 0\n",
+ "dtype: int64"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 38
+ }
+ ],
+ "source": [
+ "auto.isna().sum()"
]
},
{
@@ -833,10 +791,11 @@
},
{
"cell_type": "code",
- "execution_count": 29,
+ "execution_count": 39,
"metadata": {},
"outputs": [
{
+ "output_type": "execute_result",
"data": {
"text/plain": [
"4 199\n",
@@ -847,13 +806,13 @@
"Name: cylinders, dtype: int64"
]
},
- "execution_count": 29,
"metadata": {},
- "output_type": "execute_result"
+ "execution_count": 39
}
],
"source": [
- "# your code here \n"
+ "# there are 5 possible cylinders\n",
+ "auto['cylinders'].value_counts()"
]
},
{
@@ -869,11 +828,58 @@
},
{
"cell_type": "code",
- "execution_count": 29,
+ "execution_count": 40,
"metadata": {},
"outputs": [],
"source": [
- "# your code here\n"
+ "auto.drop(['car_name'], axis = 1, inplace = True)\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 43,
+ "metadata": {},
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ " mpg cylinders displacement horse_power weight acceleration \\\n",
+ "0 18.0 8 307.0 130.0 3504 12.0 \n",
+ "1 15.0 8 350.0 165.0 3693 11.5 \n",
+ "2 18.0 8 318.0 150.0 3436 11.0 \n",
+ "3 16.0 8 304.0 150.0 3433 12.0 \n",
+ "4 17.0 8 302.0 140.0 3449 10.5 \n",
+ ".. ... ... ... ... ... ... \n",
+ "393 27.0 4 140.0 86.0 2790 15.6 \n",
+ "394 44.0 4 97.0 52.0 2130 24.6 \n",
+ "395 32.0 4 135.0 84.0 2295 11.6 \n",
+ "396 28.0 4 120.0 79.0 2625 18.6 \n",
+ "397 31.0 4 119.0 82.0 2720 19.4 \n",
+ "\n",
+ " model_year \n",
+ "0 70 \n",
+ "1 70 \n",
+ "2 70 \n",
+ "3 70 \n",
+ "4 70 \n",
+ ".. ... \n",
+ "393 82 \n",
+ "394 82 \n",
+ "395 82 \n",
+ "396 82 \n",
+ "397 82 \n",
+ "\n",
+ "[392 rows x 7 columns]"
+ ],
+ "text/html": "\n\n
\n \n \n | \n mpg | \n cylinders | \n displacement | \n horse_power | \n weight | \n acceleration | \n model_year | \n
\n \n \n \n | 0 | \n 18.0 | \n 8 | \n 307.0 | \n 130.0 | \n 3504 | \n 12.0 | \n 70 | \n
\n \n | 1 | \n 15.0 | \n 8 | \n 350.0 | \n 165.0 | \n 3693 | \n 11.5 | \n 70 | \n
\n \n | 2 | \n 18.0 | \n 8 | \n 318.0 | \n 150.0 | \n 3436 | \n 11.0 | \n 70 | \n
\n \n | 3 | \n 16.0 | \n 8 | \n 304.0 | \n 150.0 | \n 3433 | \n 12.0 | \n 70 | \n
\n \n | 4 | \n 17.0 | \n 8 | \n 302.0 | \n 140.0 | \n 3449 | \n 10.5 | \n 70 | \n
\n \n | ... | \n ... | \n ... | \n ... | \n ... | \n ... | \n ... | \n ... | \n
\n \n | 393 | \n 27.0 | \n 4 | \n 140.0 | \n 86.0 | \n 2790 | \n 15.6 | \n 82 | \n
\n \n | 394 | \n 44.0 | \n 4 | \n 97.0 | \n 52.0 | \n 2130 | \n 24.6 | \n 82 | \n
\n \n | 395 | \n 32.0 | \n 4 | \n 135.0 | \n 84.0 | \n 2295 | \n 11.6 | \n 82 | \n
\n \n | 396 | \n 28.0 | \n 4 | \n 120.0 | \n 79.0 | \n 2625 | \n 18.6 | \n 82 | \n
\n \n | 397 | \n 31.0 | \n 4 | \n 119.0 | \n 82.0 | \n 2720 | \n 19.4 | \n 82 | \n
\n \n
\n
392 rows × 7 columns
\n
"
+ },
+ "metadata": {},
+ "execution_count": 43
+ }
+ ],
+ "source": [
+ "auto"
]
},
{
@@ -887,11 +893,59 @@
},
{
"cell_type": "code",
- "execution_count": 37,
+ "execution_count": 44,
"metadata": {},
"outputs": [],
"source": [
- "# your code here\n"
+ "from sklearn.model_selection import train_test_split"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 45,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "X = auto.drop('mpg', axis=1)\n",
+ "y = auto['mpg']"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 46,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "X_train, X_test, y_train, y_test = train_test_split(X, y, train_size= 0.8, random_state = 365)\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 47,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "auto_model = LinearRegression()\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 48,
+ "metadata": {},
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "LinearRegression()"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 48
+ }
+ ],
+ "source": [
+ "auto_model.fit(X_train, y_train)\n"
]
},
{
@@ -921,22 +975,40 @@
},
{
"cell_type": "code",
- "execution_count": 38,
+ "execution_count": 49,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "y_pred = auto_model.predict(X_train)\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 50,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from sklearn.metrics import r2_score"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 51,
"metadata": {},
"outputs": [
{
+ "output_type": "execute_result",
"data": {
"text/plain": [
- "0.8198690008457218"
+ "0.7747869393186666"
]
},
- "execution_count": 38,
"metadata": {},
- "output_type": "execute_result"
+ "execution_count": 51
}
],
"source": [
- "# your code here\n"
+ "r2_score( y_pred, y_train) #TRAIN"
]
},
{
@@ -952,22 +1024,31 @@
},
{
"cell_type": "code",
- "execution_count": 39,
+ "execution_count": 52,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "y_test_pred = auto_model.predict(X_test)\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 53,
"metadata": {},
"outputs": [
{
+ "output_type": "execute_result",
"data": {
"text/plain": [
- "0.7507754274816084"
+ "0.7511712270570078"
]
},
- "execution_count": 39,
"metadata": {},
- "output_type": "execute_result"
+ "execution_count": 53
}
],
"source": [
- "# your code here\n"
+ "r2_score( y_test_pred, y_test) #TEST\n"
]
},
{
@@ -985,7 +1066,7 @@
"metadata": {},
"outputs": [],
"source": [
- "# your answer here\n"
+ "# the r2 score of the training set is slightly higher than the test set, some improvement is possible but still a good result"
]
},
{
@@ -1001,11 +1082,21 @@
},
{
"cell_type": "code",
- "execution_count": 32,
+ "execution_count": 54,
"metadata": {},
"outputs": [],
"source": [
- "# your code here\n"
+ "X = auto.drop('mpg', axis=1)\n",
+ "y = auto['mpg']"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 55,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "X_train09, X_test09, y_train09, y_test09 = train_test_split(X, y, random_state = 34, train_size= 0.9)\n"
]
},
{
@@ -1017,11 +1108,31 @@
},
{
"cell_type": "code",
- "execution_count": 36,
+ "execution_count": 56,
"metadata": {},
"outputs": [],
"source": [
- "# your code here\n"
+ "auto_model09 = LinearRegression()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 57,
+ "metadata": {},
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "LinearRegression()"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 57
+ }
+ ],
+ "source": [
+ "auto_model09.fit(X_train09, y_train09)"
]
},
{
@@ -1033,24 +1144,40 @@
},
{
"cell_type": "code",
- "execution_count": 39,
+ "execution_count": 58,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "y_pred09 = auto_model09.predict(X_train09) #TRAIN"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 59,
"metadata": {},
"outputs": [
{
+ "output_type": "execute_result",
"data": {
"text/plain": [
- "0.8109551916128583"
+ "0.7672121730199706"
]
},
- "execution_count": 39,
"metadata": {},
- "output_type": "execute_result"
+ "execution_count": 59
}
],
"source": [
- "# your code here\n"
+ "r2_score( y_pred09, y_train09)\n"
]
},
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
{
"cell_type": "markdown",
"metadata": {},
@@ -1060,22 +1187,40 @@
},
{
"cell_type": "code",
- "execution_count": 40,
+ "execution_count": 60,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "y_test_pred09 = auto_model09.predict(X_test09) #TEST"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 61,
"metadata": {},
"outputs": [
{
+ "output_type": "execute_result",
"data": {
"text/plain": [
- "0.7913151386161112"
+ "0.6851313131247594"
]
},
- "execution_count": 40,
"metadata": {},
- "output_type": "execute_result"
+ "execution_count": 61
}
],
"source": [
- "# your code here\n"
+ "r2_score( y_test_pred09, y_test09)\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# train test has slightly better result, test set is worse than before"
]
},
{
@@ -1203,9 +1348,8 @@
],
"metadata": {
"kernelspec": {
- "display_name": "Python 3",
- "language": "python",
- "name": "python3"
+ "name": "python3",
+ "display_name": "Python 3.8.8 64-bit ('base': conda)"
},
"language_info": {
"codemirror_mode": {
@@ -1217,9 +1361,12 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.8.3"
+ "version": "3.8.8"
+ },
+ "interpreter": {
+ "hash": "05b3b62c7789db94d00c942f2499dd9a2876ea971fd9b7cf25c50892de6c631c"
}
},
"nbformat": 4,
"nbformat_minor": 2
-}
+}
\ No newline at end of file