diff --git a/your-code/.gitignore b/your-code/.gitignore new file mode 100644 index 0000000..0b0271f --- /dev/null +++ b/your-code/.gitignore @@ -0,0 +1,480 @@ + +# Created by https://www.gitignore.io/api/macos,pycharm,visualstudio,jupyternotebook,visualstudiocode +# Edit at https://www.gitignore.io/?templates=macos,pycharm,visualstudio,jupyternotebook,visualstudiocode + +### JupyterNotebook ### +.ipynb_checkpoints +*/.ipynb_checkpoints/* + +# Remove previous ipynb_checkpoints +# git rm -r .ipynb_checkpoints/ +# + +### macOS ### +# General +.DS_Store +.AppleDouble +.LSOverride + +# Icon must end with two \r +Icon + +# Thumbnails +._* + +# Files that might appear in the root of a volume +.DocumentRevisions-V100 +.fseventsd +.Spotlight-V100 +.TemporaryItems +.Trashes +.VolumeIcon.icns +.com.apple.timemachine.donotpresent + +# Directories potentially created on remote AFP share +.AppleDB +.AppleDesktop +Network Trash Folder +Temporary Items +.apdisk + +### PyCharm ### +# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and WebStorm +# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 + +# User-specific stuff +.idea/**/workspace.xml +.idea/**/tasks.xml +.idea/**/usage.statistics.xml +.idea/**/dictionaries +.idea/**/shelf + +# Generated files +.idea/**/contentModel.xml + +# Sensitive or high-churn files +.idea/**/dataSources/ +.idea/**/dataSources.ids +.idea/**/dataSources.local.xml +.idea/**/sqlDataSources.xml +.idea/**/dynamic.xml +.idea/**/uiDesigner.xml +.idea/**/dbnavigator.xml + +# Gradle +.idea/**/gradle.xml +.idea/**/libraries + +# Gradle and Maven with auto-import +# When using Gradle or Maven with auto-import, you should exclude module files, +# since they will be recreated, and may cause churn. Uncomment if using +# auto-import. +# .idea/modules.xml +# .idea/*.iml +# .idea/modules + +# CMake +cmake-build-*/ + +# Mongo Explorer plugin +.idea/**/mongoSettings.xml + +# File-based project format +*.iws + +# IntelliJ +out/ + +# mpeltonen/sbt-idea plugin +.idea_modules/ + +# JIRA plugin +atlassian-ide-plugin.xml + +# Cursive Clojure plugin +.idea/replstate.xml + +# Crashlytics plugin (for Android Studio and IntelliJ) +com_crashlytics_export_strings.xml +crashlytics.properties +crashlytics-build.properties +fabric.properties + +# Editor-based Rest Client +.idea/httpRequests + +# Android studio 3.1+ serialized cache file +.idea/caches/build_file_checksums.ser + +# JetBrains templates +**___jb_tmp___ + +### PyCharm Patch ### +# Comment Reason: https://github.com/joeblau/gitignore.io/issues/186#issuecomment-215987721 + +# *.iml +# modules.xml +# .idea/misc.xml +# *.ipr + +# Sonarlint plugin +.idea/sonarlint + +### VisualStudioCode ### +.vscode/* +!.vscode/settings.json +!.vscode/tasks.json +!.vscode/launch.json +!.vscode/extensions.json + +### VisualStudioCode Patch ### +# Ignore all local history of files +.history + +### VisualStudio ### +## Ignore Visual Studio temporary files, build results, and +## files generated by popular Visual Studio add-ons. +## +## Get latest from https://github.com/github/gitignore/blob/master/VisualStudio.gitignore + +# User-specific files +*.rsuser +*.suo +*.user +*.userosscache +*.sln.docstates + +# User-specific files (MonoDevelop/Xamarin Studio) +*.userprefs + +# Mono auto generated files +mono_crash.* + +# Build results +[Dd]ebug/ +[Dd]ebugPublic/ +[Rr]elease/ +[Rr]eleases/ +x64/ +x86/ +[Aa][Rr][Mm]/ +[Aa][Rr][Mm]64/ +bld/ +[Bb]in/ +[Oo]bj/ +[Ll]og/ + +# Visual Studio 2015/2017 cache/options directory +.vs/ +# Uncomment if you have tasks that create the project's static files in wwwroot +#wwwroot/ + +# Visual Studio 2017 auto generated files +Generated\ Files/ + +# MSTest test Results +[Tt]est[Rr]esult*/ +[Bb]uild[Ll]og.* + +# NUNIT +*.VisualState.xml +TestResult.xml + +# Build Results of an ATL Project +[Dd]ebugPS/ +[Rr]eleasePS/ +dlldata.c + +# Benchmark Results +BenchmarkDotNet.Artifacts/ + +# .NET Core +project.lock.json +project.fragment.lock.json +artifacts/ + +# StyleCop +StyleCopReport.xml + +# Files built by Visual Studio +*_i.c +*_p.c +*_h.h +*.ilk +*.meta +*.obj +*.iobj +*.pch +*.pdb +*.ipdb +*.pgc +*.pgd +*.rsp +*.sbr +*.tlb +*.tli +*.tlh +*.tmp +*.tmp_proj +*_wpftmp.csproj +*.log +*.vspscc +*.vssscc +.builds +*.pidb +*.svclog +*.scc + +# Chutzpah Test files +_Chutzpah* + +# Visual C++ cache files +ipch/ +*.aps +*.ncb +*.opendb +*.opensdf +*.sdf +*.cachefile +*.VC.db +*.VC.VC.opendb + +# Visual Studio profiler +*.psess +*.vsp +*.vspx +*.sap + +# Visual Studio Trace Files +*.e2e + +# TFS 2012 Local Workspace +$tf/ + +# Guidance Automation Toolkit +*.gpState + +# ReSharper is a .NET coding add-in +_ReSharper*/ +*.[Rr]e[Ss]harper +*.DotSettings.user + +# JustCode is a .NET coding add-in +.JustCode + +# TeamCity is a build add-in +_TeamCity* + +# DotCover is a Code Coverage Tool +*.dotCover + +# AxoCover is a Code Coverage Tool +.axoCover/* +!.axoCover/settings.json + +# Visual Studio code coverage results +*.coverage +*.coveragexml + +# NCrunch +_NCrunch_* +.*crunch*.local.xml +nCrunchTemp_* + +# MightyMoose +*.mm.* +AutoTest.Net/ + +# Web workbench (sass) +.sass-cache/ + +# Installshield output folder +[Ee]xpress/ + +# DocProject is a documentation generator add-in +DocProject/buildhelp/ +DocProject/Help/*.HxT +DocProject/Help/*.HxC +DocProject/Help/*.hhc +DocProject/Help/*.hhk +DocProject/Help/*.hhp +DocProject/Help/Html2 +DocProject/Help/html + +# Click-Once directory +publish/ + +# Publish Web Output +*.[Pp]ublish.xml +*.azurePubxml +# Note: Comment the next line if you want to checkin your web deploy settings, +# but database connection strings (with potential passwords) will be unencrypted +*.pubxml +*.publishproj + +# Microsoft Azure Web App publish settings. Comment the next line if you want to +# checkin your Azure Web App publish settings, but sensitive information contained +# in these scripts will be unencrypted +PublishScripts/ + +# NuGet Packages +*.nupkg +# The packages folder can be ignored because of Package Restore +**/[Pp]ackages/* +# except build/, which is used as an MSBuild target. +!**/[Pp]ackages/build/ +# Uncomment if necessary however generally it will be regenerated when needed +#!**/[Pp]ackages/repositories.config +# NuGet v3's project.json files produces more ignorable files +*.nuget.props +*.nuget.targets + +# Microsoft Azure Build Output +csx/ +*.build.csdef + +# Microsoft Azure Emulator +ecf/ +rcf/ + +# Windows Store app package directories and files +AppPackages/ +BundleArtifacts/ +Package.StoreAssociation.xml +_pkginfo.txt +*.appx +*.appxbundle +*.appxupload + +# Visual Studio cache files +# files ending in .cache can be ignored +*.[Cc]ache +# but keep track of directories ending in .cache +!?*.[Cc]ache/ + +# Others +ClientBin/ +~$* +*~ +*.dbmdl +*.dbproj.schemaview +*.jfm +*.pfx +*.publishsettings +orleans.codegen.cs + +# Including strong name files can present a security risk +# (https://github.com/github/gitignore/pull/2483#issue-259490424) +#*.snk + +# Since there are multiple workflows, uncomment next line to ignore bower_components +# (https://github.com/github/gitignore/pull/1529#issuecomment-104372622) +#bower_components/ + +# RIA/Silverlight projects +Generated_Code/ + +# Backup & report files from converting an old project file +# to a newer Visual Studio version. Backup files are not needed, +# because we have git ;-) +_UpgradeReport_Files/ +Backup*/ +UpgradeLog*.XML +UpgradeLog*.htm +ServiceFabricBackup/ +*.rptproj.bak + +# SQL Server files +*.mdf +*.ldf +*.ndf + +# Business Intelligence projects +*.rdl.data +*.bim.layout +*.bim_*.settings +*.rptproj.rsuser +*- Backup*.rdl + +# Microsoft Fakes +FakesAssemblies/ + +# GhostDoc plugin setting file +*.GhostDoc.xml + +# Node.js Tools for Visual Studio +.ntvs_analysis.dat +node_modules/ + +# Visual Studio 6 build log +*.plg + +# Visual Studio 6 workspace options file +*.opt + +# Visual Studio 6 auto-generated workspace file (contains which files were open etc.) +*.vbw + +# Visual Studio LightSwitch build output +**/*.HTMLClient/GeneratedArtifacts +**/*.DesktopClient/GeneratedArtifacts +**/*.DesktopClient/ModelManifest.xml +**/*.Server/GeneratedArtifacts +**/*.Server/ModelManifest.xml +_Pvt_Extensions + +# Paket dependency manager +.paket/paket.exe +paket-files/ + +# FAKE - F# Make +.fake/ + +# CodeRush personal settings +.cr/personal + +# Python Tools for Visual Studio (PTVS) +__pycache__/ +*.pyc + +# Cake - Uncomment if you are using it +# tools/** +# !tools/packages.config + +# Tabs Studio +*.tss + +# Telerik's JustMock configuration file +*.jmconfig + +# BizTalk build output +*.btp.cs +*.btm.cs +*.odx.cs +*.xsd.cs + +# OpenCover UI analysis results +OpenCover/ + +# Azure Stream Analytics local run output +ASALocalRun/ + +# MSBuild Binary and Structured Log +*.binlog + +# NVidia Nsight GPU debugger configuration file +*.nvuser + +# MFractors (Xamarin productivity tool) working folder +.mfractor/ + +# Local History for Visual Studio +.localhistory/ + +# BeatPulse healthcheck temp database +healthchecksdb + +# Backup folder for Package Reference Convert tool in Visual Studio 2017 +MigrationBackup/ + +# End of https://www.gitignore.io/api/macos,pycharm,visualstudio,jupyternotebook,visualstudiocode \ No newline at end of file diff --git a/your-code/main.ipynb b/your-code/main.ipynb index 8818016..d8be5d6 100644 --- a/your-code/main.ipynb +++ b/your-code/main.ipynb @@ -12,14 +12,14 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 690, "metadata": {}, "outputs": [], "source": [ "# Libraries\n", "import pandas as pd\n", "import numpy as np\n", - "from sklearn import datasets\n" + "from sklearn import datasets" ] }, { @@ -40,11 +40,99 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 691, "metadata": {}, "outputs": [], "source": [ - "# your code here\n" + "#pwd" + ] + }, + { + "cell_type": "code", + "execution_count": 692, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'data': array([[ 0.03807591, 0.05068012, 0.06169621, ..., -0.00259226,\n", + " 0.01990842, -0.01764613],\n", + " [-0.00188202, -0.04464164, -0.05147406, ..., -0.03949338,\n", + " -0.06832974, -0.09220405],\n", + " [ 0.08529891, 0.05068012, 0.04445121, ..., -0.00259226,\n", + " 0.00286377, -0.02593034],\n", + " ...,\n", + " [ 0.04170844, 0.05068012, -0.01590626, ..., -0.01107952,\n", + " -0.04687948, 0.01549073],\n", + " [-0.04547248, -0.04464164, 0.03906215, ..., 0.02655962,\n", + " 0.04452837, -0.02593034],\n", + " [-0.04547248, -0.04464164, -0.0730303 , ..., -0.03949338,\n", + " -0.00421986, 0.00306441]]),\n", + " 'target': array([151., 75., 141., 206., 135., 97., 138., 63., 110., 310., 101.,\n", + " 69., 179., 185., 118., 171., 166., 144., 97., 168., 68., 49.,\n", + " 68., 245., 184., 202., 137., 85., 131., 283., 129., 59., 341.,\n", + " 87., 65., 102., 265., 276., 252., 90., 100., 55., 61., 92.,\n", + " 259., 53., 190., 142., 75., 142., 155., 225., 59., 104., 182.,\n", + " 128., 52., 37., 170., 170., 61., 144., 52., 128., 71., 163.,\n", + " 150., 97., 160., 178., 48., 270., 202., 111., 85., 42., 170.,\n", + " 200., 252., 113., 143., 51., 52., 210., 65., 141., 55., 134.,\n", + " 42., 111., 98., 164., 48., 96., 90., 162., 150., 279., 92.,\n", + " 83., 128., 102., 302., 198., 95., 53., 134., 144., 232., 81.,\n", + " 104., 59., 246., 297., 258., 229., 275., 281., 179., 200., 200.,\n", + " 173., 180., 84., 121., 161., 99., 109., 115., 268., 274., 158.,\n", + " 107., 83., 103., 272., 85., 280., 336., 281., 118., 317., 235.,\n", + " 60., 174., 259., 178., 128., 96., 126., 288., 88., 292., 71.,\n", + " 197., 186., 25., 84., 96., 195., 53., 217., 172., 131., 214.,\n", + " 59., 70., 220., 268., 152., 47., 74., 295., 101., 151., 127.,\n", + " 237., 225., 81., 151., 107., 64., 138., 185., 265., 101., 137.,\n", + " 143., 141., 79., 292., 178., 91., 116., 86., 122., 72., 129.,\n", + " 142., 90., 158., 39., 196., 222., 277., 99., 196., 202., 155.,\n", + " 77., 191., 70., 73., 49., 65., 263., 248., 296., 214., 185.,\n", + " 78., 93., 252., 150., 77., 208., 77., 108., 160., 53., 220.,\n", + " 154., 259., 90., 246., 124., 67., 72., 257., 262., 275., 177.,\n", + " 71., 47., 187., 125., 78., 51., 258., 215., 303., 243., 91.,\n", + " 150., 310., 153., 346., 63., 89., 50., 39., 103., 308., 116.,\n", + " 145., 74., 45., 115., 264., 87., 202., 127., 182., 241., 66.,\n", + " 94., 283., 64., 102., 200., 265., 94., 230., 181., 156., 233.,\n", + " 60., 219., 80., 68., 332., 248., 84., 200., 55., 85., 89.,\n", + " 31., 129., 83., 275., 65., 198., 236., 253., 124., 44., 172.,\n", + " 114., 142., 109., 180., 144., 163., 147., 97., 220., 190., 109.,\n", + " 191., 122., 230., 242., 248., 249., 192., 131., 237., 78., 135.,\n", + " 244., 199., 270., 164., 72., 96., 306., 91., 214., 95., 216.,\n", + " 263., 178., 113., 200., 139., 139., 88., 148., 88., 243., 71.,\n", + " 77., 109., 272., 60., 54., 221., 90., 311., 281., 182., 321.,\n", + " 58., 262., 206., 233., 242., 123., 167., 63., 197., 71., 168.,\n", + " 140., 217., 121., 235., 245., 40., 52., 104., 132., 88., 69.,\n", + " 219., 72., 201., 110., 51., 277., 63., 118., 69., 273., 258.,\n", + " 43., 198., 242., 232., 175., 93., 168., 275., 293., 281., 72.,\n", + " 140., 189., 181., 209., 136., 261., 113., 131., 174., 257., 55.,\n", + " 84., 42., 146., 212., 233., 91., 111., 152., 120., 67., 310.,\n", + " 94., 183., 66., 173., 72., 49., 64., 48., 178., 104., 132.,\n", + " 220., 57.]),\n", + " 'frame': None,\n", + " 'DESCR': '.. _diabetes_dataset:\\n\\nDiabetes dataset\\n----------------\\n\\nTen baseline variables, age, sex, body mass index, average blood\\npressure, and six blood serum measurements were obtained for each of n =\\n442 diabetes patients, as well as the response of interest, a\\nquantitative measure of disease progression one year after baseline.\\n\\n**Data Set Characteristics:**\\n\\n :Number of Instances: 442\\n\\n :Number of Attributes: First 10 columns are numeric predictive values\\n\\n :Target: Column 11 is a quantitative measure of disease progression one year after baseline\\n\\n :Attribute Information:\\n - age age in years\\n - sex\\n - bmi body mass index\\n - bp average blood pressure\\n - s1 tc, total serum cholesterol\\n - s2 ldl, low-density lipoproteins\\n - s3 hdl, high-density lipoproteins\\n - s4 tch, total cholesterol / HDL\\n - s5 ltg, possibly log of serum triglycerides level\\n - s6 glu, blood sugar level\\n\\nNote: Each of these 10 feature variables have been mean centered and scaled by the standard deviation times `n_samples` (i.e. the sum of squares of each column totals 1).\\n\\nSource URL:\\nhttps://www4.stat.ncsu.edu/~boos/var.select/diabetes.html\\n\\nFor more information see:\\nBradley Efron, Trevor Hastie, Iain Johnstone and Robert Tibshirani (2004) \"Least Angle Regression,\" Annals of Statistics (with discussion), 407-499.\\n(https://web.stanford.edu/~hastie/Papers/LARS/LeastAngle_2002.pdf)',\n", + " 'feature_names': ['age',\n", + " 'sex',\n", + " 'bmi',\n", + " 'bp',\n", + " 's1',\n", + " 's2',\n", + " 's3',\n", + " 's4',\n", + " 's5',\n", + " 's6'],\n", + " 'data_filename': '/Users/ayubpathan/opt/anaconda3/lib/python3.8/site-packages/sklearn/datasets/data/diabetes_data.csv.gz',\n", + " 'target_filename': '/Users/ayubpathan/opt/anaconda3/lib/python3.8/site-packages/sklearn/datasets/data/diabetes_target.csv.gz'}" + ] + }, + "execution_count": 692, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "diabetes = datasets.load_diabetes()\n", + "diabetes" ] }, { @@ -56,7 +144,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 693, "metadata": {}, "outputs": [ { @@ -65,13 +153,14 @@ "dict_keys(['data', 'target', 'frame', 'DESCR', 'feature_names', 'data_filename', 'target_filename'])" ] }, - "execution_count": 3, + "execution_count": 693, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "# your code here\n" + "# your code here\n", + "diabetes.keys()" ] }, { @@ -87,10 +176,8 @@ }, { "cell_type": "code", - "execution_count": 4, - "metadata": { - "scrolled": false - }, + "execution_count": 694, + "metadata": {}, "outputs": [ { "name": "stdout", @@ -119,11 +206,11 @@ " - sex\n", " - bmi body mass index\n", " - bp average blood pressure\n", - " - s1 tc, T-Cells (a type of white blood cells)\n", + " - s1 tc, total serum cholesterol\n", " - s2 ldl, low-density lipoproteins\n", " - s3 hdl, high-density lipoproteins\n", - " - s4 tch, thyroid stimulating hormone\n", - " - s5 ltg, lamotrigine\n", + " - s4 tch, total cholesterol / HDL\n", + " - s5 ltg, possibly log of serum triglycerides level\n", " - s6 glu, blood sugar level\n", "\n", "Note: Each of these 10 feature variables have been mean centered and scaled by the standard deviation times `n_samples` (i.e. the sum of squares of each column totals 1).\n", @@ -138,7 +225,8 @@ } ], "source": [ - "# your code here\n" + "# your code here\n", + "print(diabetes.DESCR)" ] }, { @@ -155,12 +243,31 @@ ] }, { - "cell_type": "code", - "execution_count": 5, + "cell_type": "markdown", "metadata": {}, - "outputs": [], "source": [ - "# your answer here \n" + "#### Your answer here \n", + "1. How many attributes are there in the data? What do they mean?\n", + " - There are total 10 attributes:\n", + " - age age in years\n", + " - sex gender of the patient\n", + " - bmi body mass index\n", + " - bp average blood pressure\n", + " - s1 tc, total serum cholesterol\n", + " - s2 ldl, low-density lipoproteins\n", + " - s3 hdl, high-density lipoproteins\n", + " - s4 tch, total cholesterol / HDL\n", + " - s5 ltg, possibly log of serum triglycerides level\n", + " - s6 glu, blood sugar level\n", + " \n", + "2. What is the relation between diabetes['data'] and diabetes['target']?\n", + " - The relation between data and target is that data is recorded for each patient based on age, sex and\n", + " other 8 attributes as described in question 1 and target is the record of disease progression on the based \n", + " of all these 10 attributes. So in short this is record over a period of considered time (in this case 1 \n", + " year).\n", + "\n", + "3. How many records are there in the data?\n", + " - There are 442 instances.\n" ] }, { @@ -174,55 +281,44 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 695, "metadata": {}, "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Shape of 'data' :\n" - ] - }, { "data": { "text/plain": [ "(442, 10)" ] }, - "execution_count": 6, + "execution_count": 695, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "# your code here\n" + "# your code here\n", + "diabetes['data'].shape" ] }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 696, "metadata": {}, "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Shape of 'target' :\n" - ] - }, { "data": { "text/plain": [ "(442,)" ] }, - "execution_count": 7, + "execution_count": 696, "metadata": {}, "output_type": "execute_result" } ], - "source": [] + "source": [ + "diabetes['target'].shape" + ] }, { "cell_type": "markdown", @@ -256,15 +352,6 @@ "#### In the cell below, import the `linear_model` class from `sklearn`. " ] }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [], - "source": [ - "# your code here\n" - ] - }, { "cell_type": "markdown", "metadata": {}, @@ -274,11 +361,13 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 697, "metadata": {}, "outputs": [], "source": [ - "# your code here\n" + "# your code here\n", + "from sklearn.linear_model import LinearRegression\n", + "diabetes_model = LinearRegression()" ] }, { @@ -292,11 +381,19 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 698, "metadata": {}, "outputs": [], "source": [ - "# your code here\n" + "# your code here\n", + "# slicing data with respect to till and from last 20 records\n", + "X = diabetes['data']\n", + "y = diabetes['target']\n", + "\n", + "diabetes_data_train = X[:-20]\n", + "diabetes_target_train = y[:-20]\n", + "diabetes_data_test = X[-20:]\n", + "diabetes_target_test =y[-20:]\n" ] }, { @@ -308,7 +405,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 699, "metadata": {}, "outputs": [ { @@ -317,46 +414,57 @@ "LinearRegression()" ] }, - "execution_count": 14, + "execution_count": 699, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "# your code here\n" + "# your code here\n", + "diabetes_model.fit(diabetes_data_train, diabetes_target_train)" ] }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 700, "metadata": {}, "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - "Intercept: 152.76430691633442\n" - ] + "data": { + "text/plain": [ + "152.76430691633442" + ] + }, + "execution_count": 700, + "metadata": {}, + "output_type": "execute_result" } ], - "source": [] + "source": [ + "diabetes_model.intercept_" + ] }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 701, "metadata": {}, "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - "Coefficients: [ 3.03499549e-01 -2.37639315e+02 5.10530605e+02 3.27736980e+02\n", - " -8.14131709e+02 4.92814588e+02 1.02848452e+02 1.84606489e+02\n", - " 7.43519617e+02 7.60951722e+01]\n" - ] + "data": { + "text/plain": [ + "array([ 3.03499549e-01, -2.37639315e+02, 5.10530605e+02, 3.27736980e+02,\n", + " -8.14131709e+02, 4.92814588e+02, 1.02848452e+02, 1.84606489e+02,\n", + " 7.43519617e+02, 7.60951722e+01])" + ] + }, + "execution_count": 701, + "metadata": {}, + "output_type": "execute_result" } ], - "source": [] + "source": [ + "diabetes_model.coef_" + ] }, { "cell_type": "markdown", @@ -376,11 +484,27 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 702, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "array([197.61846908, 155.43979328, 172.88665147, 111.53537279,\n", + " 164.80054784, 131.06954875, 259.12237761, 100.47935157,\n", + " 117.0601052 , 124.30503555, 218.36632793, 61.19831284,\n", + " 132.25046751, 120.3332925 , 52.54458691, 194.03798088,\n", + " 102.57139702, 123.56604987, 211.0346317 , 52.60335674])" + ] + }, + "execution_count": 702, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# your code here\n" + "# your code here\n", + "diabetes_model.predict(diabetes_data_test)" ] }, { @@ -392,75 +516,51 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 703, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "diabetes_target_test:\n" + "diabetes_target_test:\n", + "[233. 91. 111. 152. 120. 67. 310. 94. 183. 66. 173. 72. 49. 64.\n", + " 48. 178. 104. 132. 220. 57.]\n" ] - }, - { - "data": { - "text/plain": [ - "array([233., 91., 111., 152., 120., 67., 310., 94., 183., 66., 173.,\n", - " 72., 49., 64., 48., 178., 104., 132., 220., 57.])" - ] - }, - "execution_count": 23, - "metadata": {}, - "output_type": "execute_result" } ], "source": [ - "# your code here\n" + "# your code here\n", + "print(\"diabetes_target_test:\")\n", + "print(diabetes_target_test)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Is `diabetes_target_test` exactly the same as the model prediction? Explain." ] }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 704, "metadata": {}, "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "test prediction:\n" - ] - }, { "data": { "text/plain": [ - "array([197.61846908, 155.43979328, 172.88665147, 111.53537279,\n", - " 164.80054784, 131.06954875, 259.12237761, 100.47935157,\n", - " 117.0601052 , 124.30503555, 218.36632793, 61.19831284,\n", - " 132.25046751, 120.3332925 , 52.54458691, 194.03798088,\n", - " 102.57139702, 123.56604987, 211.0346317 , 52.60335674])" + "'Diabetes_target_test and diabetes_target_test are not same.'" ] }, - "execution_count": 24, + "execution_count": 704, "metadata": {}, "output_type": "execute_result" } ], - "source": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Is `diabetes_target_test` exactly the same as the model prediction? Explain." - ] - }, - { - "cell_type": "code", - "execution_count": 25, - "metadata": {}, - "outputs": [], "source": [ - "# your answer here \n" + "# your answer here \n", + "\"Diabetes_target_test and diabetes_target_test are not same.\"" ] }, { @@ -495,54 +595,59 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 705, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - " OLS Regression Results \n", - "==============================================================================\n", - "Dep. Variable: y R-squared: 0.512\n", - "Model: OLS Adj. R-squared: 0.500\n", - "Method: Least Squares F-statistic: 43.16\n", - "Date: Mon, 10 May 2021 Prob (F-statistic): 4.64e-58\n", - "Time: 17:52:26 Log-Likelihood: -2281.1\n", - "No. Observations: 422 AIC: 4584.\n", - "Df Residuals: 411 BIC: 4629.\n", - "Df Model: 10 \n", - "Covariance Type: nonrobust \n", + " OLS Regression Results \n", + "=======================================================================================\n", + "Dep. Variable: y R-squared (uncentered): 0.110\n", + "Model: OLS Adj. R-squared (uncentered): 0.089\n", + "Method: Least Squares F-statistic: 5.109\n", + "Date: Mon, 19 Jul 2021 Prob (F-statistic): 4.77e-07\n", + "Time: 23:27:29 Log-Likelihood: -2745.5\n", + "No. Observations: 422 AIC: 5511.\n", + "Df Residuals: 412 BIC: 5552.\n", + "Df Model: 10 \n", + "Covariance Type: nonrobust \n", "==============================================================================\n", " coef std err t P>|t| [0.025 0.975]\n", "------------------------------------------------------------------------------\n", - "x1 0.3035 61.286 0.005 0.996 -120.169 120.776\n", - "x2 -237.6393 62.837 -3.782 0.000 -361.162 -114.117\n", - "x3 510.5306 68.156 7.491 0.000 376.553 644.508\n", - "x4 327.7370 66.876 4.901 0.000 196.275 459.199\n", - "x5 -814.1317 424.044 -1.920 0.056 -1647.697 19.434\n", - "x6 492.8146 344.227 1.432 0.153 -183.850 1169.480\n", - "x7 102.8485 219.463 0.469 0.640 -328.561 534.258\n", - "x8 184.6065 167.336 1.103 0.271 -144.334 513.547\n", - "x9 743.5196 175.359 4.240 0.000 398.807 1088.232\n", - "x10 76.0952 68.293 1.114 0.266 -58.152 210.343\n", - "const 152.7643 2.658 57.469 0.000 147.539 157.990\n", + "x1 42.9190 183.983 0.233 0.816 -318.744 404.582\n", + "x2 -261.9605 188.650 -1.389 0.166 -632.798 108.877\n", + "x3 547.5378 204.613 2.676 0.008 145.322 949.754\n", + "x4 352.4704 200.777 1.756 0.080 -42.205 747.146\n", + "x5 -634.0265 1273.063 -0.498 0.619 -3136.536 1868.483\n", + "x6 285.1002 1033.408 0.276 0.783 -1746.310 2316.510\n", + "x7 -9.4062 658.863 -0.014 0.989 -1304.558 1285.746\n", + "x8 197.4998 502.388 0.393 0.694 -790.064 1185.063\n", + "x9 670.7500 526.463 1.274 0.203 -364.139 1705.639\n", + "x10 11.6643 205.008 0.057 0.955 -391.327 414.656\n", "==============================================================================\n", - "Omnibus: 1.544 Durbin-Watson: 2.026\n", - "Prob(Omnibus): 0.462 Jarque-Bera (JB): 1.421\n", - "Skew: 0.004 Prob(JB): 0.491\n", - "Kurtosis: 2.716 Cond. No. 224.\n", + "Omnibus: 0.574 Durbin-Watson: 0.228\n", + "Prob(Omnibus): 0.751 Jarque-Bera (JB): 0.677\n", + "Skew: -0.001 Prob(JB): 0.713\n", + "Kurtosis: 2.804 Cond. No. 21.4\n", "==============================================================================\n", "\n", - "Warnings:\n", - "[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n" + "Notes:\n", + "[1] R² is computed without centering (uncentered) since the model does not contain a constant.\n", + "[2] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n" ] } ], "source": [ - "# your code here\n", + "# import libraries\n", "import statsmodels.api as sm\n", - "\n" + "\n", + "# Fit and summarize OLS model\n", + "model = sm.OLS(diabetes_target_train, diabetes_data_train)\n", + "\n", + "result = model.fit()\n", + "print(result.summary())" ] }, { @@ -561,12 +666,31 @@ ] }, { - "cell_type": "code", - "execution_count": 41, + "cell_type": "markdown", "metadata": {}, - "outputs": [], "source": [ - "# your answer here\n" + "#### your answer here\n", + "Answer the following questions in the cell below:\n", + "\n", + "What is the F-score of your linear model and is the null hypothesis rejected?\n", + "- P value of F-Statistics is 4.64e-58 which is very close to zero and hence we reject null hypothesis.\n", + "\n", + "Does any of the t-tests of the coefficients produce a confidence interval containing zero? What are they?\n", + "- There are several coefficients which contains zero in the confidence interval as followings:\n", + " coeff: x1 which has confidence interval between -120.169 and 120.776\n", + " coeff: x5 which is between -1647.697 and 19.434\n", + " coeff: x6 which is between -183.850 and 1169.480\n", + " coeff: x7 which is between -328.561 and 534.258\n", + " coeff: x8 which is between -144.334 and 513.547\n", + " coeff: x10 which is between -58.152 and 210.343\n", + "\n", + "How will you modify your linear reguression model according to the test results above?\n", + " - As we know from following statements: \n", + " - The t-tests on each coefficient is to check whether the confidence interval for the variable contains zero. \n", + " - If the confidence interval contains zero, it means the null hypothesis for that variable is not rejected. \n", + " - In other words, this particular vaiable is not contributing to your linear model and you can remove it from \n", + " your formula.\n", + " - Hence it is way better to remove those coeeficients containing zero in confidence intervals to improve the test results." ] }, { @@ -587,11 +711,12 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 706, "metadata": {}, "outputs": [], "source": [ - "# your code here\n" + "# your code here\n", + "auto = pd.read_csv('/Users/ayubpathan/Desktop/ironhack/week7/lab-supervised-learning-sklearn/data/auto-mpg.csv')" ] }, { @@ -603,7 +728,7 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 707, "metadata": {}, "outputs": [ { @@ -713,13 +838,14 @@ "4 70 \\t\"ford torino\" " ] }, - "execution_count": 27, + "execution_count": 707, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "# your code here\n" + "# your code here\n", + "auto.head()" ] }, { @@ -731,7 +857,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 708, "metadata": {}, "outputs": [ { @@ -757,7 +883,8 @@ } ], "source": [ - "# your code here\n" + "# your code here\n", + "auto.info()" ] }, { @@ -769,7 +896,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 709, "metadata": {}, "outputs": [ { @@ -778,19 +905,20 @@ "70" ] }, - "execution_count": 24, + "execution_count": 709, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# your code here\n", - "# OLDEST MODEL\n" + "# OLDEST MODEL\n", + "auto['model_year'].min()" ] }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 710, "metadata": {}, "outputs": [ { @@ -799,13 +927,14 @@ "82" ] }, - "execution_count": 25, + "execution_count": 710, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "# NEWEST MODEL \n" + "# NEWEST MODEL \n", + "auto['model_year'].max()" ] }, { @@ -817,11 +946,60 @@ }, { "cell_type": "code", - "execution_count": 28, + "execution_count": 711, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "mpg 0\n", + "cylinders 0\n", + "displacement 0\n", + "horse_power 6\n", + "weight 0\n", + "acceleration 0\n", + "model_year 0\n", + "car_name 0\n", + "dtype: int64" + ] + }, + "execution_count": 711, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# your code here\n" + "# your code here\n", + "auto.isna().sum()" + ] + }, + { + "cell_type": "code", + "execution_count": 712, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "mpg 0\n", + "cylinders 0\n", + "displacement 0\n", + "horse_power 0\n", + "weight 0\n", + "acceleration 0\n", + "model_year 0\n", + "car_name 0\n", + "dtype: int64" + ] + }, + "execution_count": 712, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "auto = auto.dropna()\n", + "auto.isna().sum()" ] }, { @@ -833,7 +1011,7 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 713, "metadata": {}, "outputs": [ { @@ -847,13 +1025,31 @@ "Name: cylinders, dtype: int64" ] }, - "execution_count": 29, + "execution_count": 713, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "# your code here \n" + "# your code here \n", + "auto['cylinders'].value_counts()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 714, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "How many possible values of cylinders are there? - Total 5\n" + ] + } + ], + "source": [ + "print(\"How many possible values of cylinders are there? - Total 5\")" ] }, { @@ -869,11 +1065,33 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 715, + "metadata": {}, + "outputs": [], + "source": [ + "auto.drop(['car_name'],axis=1,inplace=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 716, + "metadata": {}, + "outputs": [], + "source": [ + "X = auto.drop('mpg', axis=1)\n", + "#y = auto.loc[:].shape\n", + "y = auto['mpg']" + ] + }, + { + "cell_type": "code", + "execution_count": 717, "metadata": {}, "outputs": [], "source": [ - "# your code here\n" + "# your code here\n", + "from sklearn.model_selection import train_test_split\n", + "X_train,X_test, y_train, y_test = train_test_split(X,y,test_size=0.20, random_state=12)" ] }, { @@ -887,11 +1105,24 @@ }, { "cell_type": "code", - "execution_count": 37, + "execution_count": 718, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "LinearRegression()" + ] + }, + "execution_count": 718, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# your code here\n" + "# your code here\n", + "auto_model = LinearRegression()\n", + "auto_model.fit(X_train, y_train)" ] }, { @@ -921,22 +1152,33 @@ }, { "cell_type": "code", - "execution_count": 38, + "execution_count": 719, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score\n", + "y_pred = auto_model.predict(X_train)" + ] + }, + { + "cell_type": "code", + "execution_count": 720, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "0.8198690008457218" + "0.8183286817490285" ] }, - "execution_count": 38, + "execution_count": 720, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "# your code here\n" + "# your code here\n", + "r2_score(y_train, y_pred)" ] }, { @@ -952,22 +1194,24 @@ }, { "cell_type": "code", - "execution_count": 39, + "execution_count": 721, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "0.7507754274816084" + "0.7667119615108611" ] }, - "execution_count": 39, + "execution_count": 721, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "# your code here\n" + "# your code here\n", + "y_test_pred = auto_model.predict(X_test)\n", + "r2_score(y_test,y_test_pred)" ] }, { @@ -981,11 +1225,23 @@ }, { "cell_type": "code", - "execution_count": 31, + "execution_count": 722, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "'R2 score for train set is 0.8183 while R2 score for test set is 0.7667, so we can simply say that our test data was overfitted'" + ] + }, + "execution_count": 722, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# your answer here\n" + "# your answer here\n", + "\"R2 score for train set is 0.8183 while R2 score for test set is 0.7667, so we can simply say that our test data was overfitted\"" ] }, { @@ -1001,11 +1257,12 @@ }, { "cell_type": "code", - "execution_count": 32, + "execution_count": 723, "metadata": {}, "outputs": [], "source": [ - "# your code here\n" + "# your code here\n", + "X_train09,X_test09, y_train09, y_test09 = train_test_split(X,y,test_size=0.10, random_state=12)" ] }, { @@ -1017,11 +1274,24 @@ }, { "cell_type": "code", - "execution_count": 36, + "execution_count": 724, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "LinearRegression()" + ] + }, + "execution_count": 724, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# your code here\n" + "# your code here\n", + "auto_model09 = LinearRegression() \n", + "auto_model09.fit(X_train09, y_train09)" ] }, { @@ -1033,22 +1303,32 @@ }, { "cell_type": "code", - "execution_count": 39, + "execution_count": 725, + "metadata": {}, + "outputs": [], + "source": [ + "y_pred09 = auto_model09.predict(X_train09)" + ] + }, + { + "cell_type": "code", + "execution_count": 726, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "0.8109551916128583" + "0.8135037960103886" ] }, - "execution_count": 39, + "execution_count": 726, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "# your code here\n" + "#r squared score:\n", + "r2_score(y_train09, y_pred09)" ] }, { @@ -1060,22 +1340,33 @@ }, { "cell_type": "code", - "execution_count": 40, + "execution_count": 727, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "0.7913151386161112" + "0.7644393459851583" ] }, - "execution_count": 40, + "execution_count": 727, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "# your code here\n" + "# your code here\n", + "y_test_pred09 = auto_model09.predict(X_test09)\n", + "r2_score(y_test09,y_test_pred09)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "There was improvement but not much as we can see previous value of r squared score was 0.7667119615108611\n", + "while r squared with reduced test set is 0.7644393459851583 which is clearly shows slight change in few decimal\n", + "points as compared to previous one. But not so big difference!" ] }, { @@ -1091,12 +1382,13 @@ }, { "cell_type": "code", - "execution_count": 41, + "execution_count": 728, "metadata": {}, "outputs": [], "source": [ "# Libraries\n", - "from sklearn.feature_selection import RFE" + "from sklearn.feature_selection import RFE\n", + "from sklearn.svm import SVR" ] }, { @@ -1108,11 +1400,13 @@ }, { "cell_type": "code", - "execution_count": 42, + "execution_count": 729, "metadata": {}, "outputs": [], "source": [ - "# your code here\n" + "# your code here\n", + "estimator = LinearRegression()\n", + "auto_model = RFE(estimator, n_features_to_select=3, step=1)" ] }, { @@ -1124,7 +1418,7 @@ }, { "cell_type": "code", - "execution_count": 43, + "execution_count": 730, "metadata": {}, "outputs": [ { @@ -1133,20 +1427,172 @@ "RFE(estimator=LinearRegression(), n_features_to_select=3)" ] }, - "execution_count": 43, + "execution_count": 730, "metadata": {}, "output_type": "execute_result" } ], - "source": [] + "source": [ + "auto_model = auto_model.fit(X,y)\n", + "auto_model" + ] }, { "cell_type": "code", - "execution_count": 35, + "execution_count": 731, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "array([1, 2, 4, 3, 1, 1])" + ] + }, + "execution_count": 731, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# your code here\n", + "auto_model.ranking_" + ] + }, + { + "cell_type": "code", + "execution_count": 732, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['cylinders', 'displacement', 'horse_power', 'weight', 'acceleration',\n", + " 'model_year'],\n", + " dtype='object')" + ] + }, + "execution_count": 732, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# your code here\n" + "X.columns" + ] + }, + { + "cell_type": "code", + "execution_count": 733, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
mpgcylindersdisplacementhorse_powerweightaccelerationmodel_year
018.08307.0130.0350412.070
115.08350.0165.0369311.570
218.08318.0150.0343611.070
316.08304.0150.0343312.070
417.08302.0140.0344910.570
\n", + "
" + ], + "text/plain": [ + " mpg cylinders displacement horse_power weight acceleration \\\n", + "0 18.0 8 307.0 130.0 3504 12.0 \n", + "1 15.0 8 350.0 165.0 3693 11.5 \n", + "2 18.0 8 318.0 150.0 3436 11.0 \n", + "3 16.0 8 304.0 150.0 3433 12.0 \n", + "4 17.0 8 302.0 140.0 3449 10.5 \n", + "\n", + " model_year \n", + "0 70 \n", + "1 70 \n", + "2 70 \n", + "3 70 \n", + "4 70 " + ] + }, + "execution_count": 733, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "auto.head()" ] }, { @@ -1160,11 +1606,66 @@ }, { "cell_type": "code", - "execution_count": 46, + "execution_count": 734, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['cylinders', 'displacement', 'horse_power', 'weight', 'acceleration',\n", + " 'model_year'],\n", + " dtype='object')" + ] + }, + "execution_count": 734, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "X.columns" + ] + }, + { + "cell_type": "code", + "execution_count": 735, "metadata": {}, "outputs": [], "source": [ - "# your code here\n" + "y_RFE_predict = auto_model.predict(X_test)" + ] + }, + { + "cell_type": "code", + "execution_count": 736, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.6428672894157043" + ] + }, + "execution_count": 736, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "r2_score(y_test,y_RFE_predict)" + ] + }, + { + "cell_type": "code", + "execution_count": 737, + "metadata": {}, + "outputs": [], + "source": [ + "# your code here\n", + "X = auto_model.transform(X)\n", + "y = auto['displacement']\n", + "\n", + "X_train_reduced, X_test_reduced, y_test_reduced, y_train_reduced = train_test_split(X,y,test_size=0.2, random_state=12)\n" ] }, { @@ -1176,11 +1677,15 @@ }, { "cell_type": "code", - "execution_count": 40, + "execution_count": 739, "metadata": {}, "outputs": [], "source": [ - "# your code here\n" + "# your code here\n", + "auto_model_reduced = LinearRegression() \n", + "#auto_model_reduced.fit(X_train_reduced, y_train_reduced)\n", + "#y_RFE_predict_reduced = auto_model.predict(X_train_reduced)\n", + "#r2_score(y_test_reduced,y_RFE_predict_reduced)" ] }, { @@ -1217,7 +1722,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.3" + "version": "3.8.10" } }, "nbformat": 4,