diff --git a/your-code/main.ipynb b/your-code/main.ipynb index 68b3762..76da4bf 100644 --- a/your-code/main.ipynb +++ b/your-code/main.ipynb @@ -12,11 +12,12 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 134, "metadata": {}, "outputs": [], "source": [ - "#Import your libraries\n" + "import pandas as pd\n", + "import datetime as datetime" ] }, { @@ -38,11 +39,12 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, "metadata": {}, "outputs": [], "source": [ - "# Your code here\n" + "data = pd.read_csv(\"austin_weather.csv\")\n", + "austin = data.copy()" ] }, { @@ -57,29 +59,379 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "(1319, 21)" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Your code here\n" + "austin.shape" ] }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Your code here\n" + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 1319 entries, 0 to 1318\n", + "Data columns (total 21 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 Date 1319 non-null object\n", + " 1 TempHighF 1319 non-null int64 \n", + " 2 TempAvgF 1319 non-null int64 \n", + " 3 TempLowF 1319 non-null int64 \n", + " 4 DewPointHighF 1319 non-null object\n", + " 5 DewPointAvgF 1319 non-null object\n", + " 6 DewPointLowF 1319 non-null object\n", + " 7 HumidityHighPercent 1319 non-null object\n", + " 8 HumidityAvgPercent 1319 non-null object\n", + " 9 HumidityLowPercent 1319 non-null object\n", + " 10 SeaLevelPressureHighInches 1319 non-null object\n", + " 11 SeaLevelPressureAvgInches 1319 non-null object\n", + " 12 SeaLevelPressureLowInches 1319 non-null object\n", + " 13 VisibilityHighMiles 1319 non-null object\n", + " 14 VisibilityAvgMiles 1319 non-null object\n", + " 15 VisibilityLowMiles 1319 non-null object\n", + " 16 WindHighMPH 1319 non-null object\n", + " 17 WindAvgMPH 1319 non-null object\n", + " 18 WindGustMPH 1319 non-null object\n", + " 19 PrecipitationSumInches 1319 non-null object\n", + " 20 Events 1319 non-null object\n", + "dtypes: int64(3), object(18)\n", + "memory usage: 216.5+ KB\n" + ] + } + ], + "source": [ + "austin.info()" ] }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Your code here\n" + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
countmeanstdmin25%50%75%max
TempHighF1319.080.86277514.76652332.072.083.092.0107.0
TempAvgF1319.070.64291114.04590429.062.073.083.093.0
TempLowF1319.059.90295714.19064819.049.063.073.081.0
\n", + "
" + ], + "text/plain": [ + " count mean std min 25% 50% 75% max\n", + "TempHighF 1319.0 80.862775 14.766523 32.0 72.0 83.0 92.0 107.0\n", + "TempAvgF 1319.0 70.642911 14.045904 29.0 62.0 73.0 83.0 93.0\n", + "TempLowF 1319.0 59.902957 14.190648 19.0 49.0 63.0 73.0 81.0" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "austin.describe().T" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
DateTempHighFTempAvgFTempLowFDewPointHighFDewPointAvgFDewPointLowFHumidityHighPercentHumidityAvgPercentHumidityLowPercent...SeaLevelPressureAvgInchesSeaLevelPressureLowInchesVisibilityHighMilesVisibilityAvgMilesVisibilityLowMilesWindHighMPHWindAvgMPHWindGustMPHPrecipitationSumInchesEvents
02013-12-21746045674943937557...29.6829.591072204310.46Rain , Thunderstorm
12013-12-22564839433628936843...30.1329.8710105166250
22013-12-23584532312723765227...30.4930.4110101083120
32013-12-24614631362821895622...30.4530.310107124200
42013-12-25585041444036867156...30.3330.271010710216T
\n", + "

5 rows × 21 columns

\n", + "
" + ], + "text/plain": [ + " Date TempHighF TempAvgF TempLowF DewPointHighF DewPointAvgF \\\n", + "0 2013-12-21 74 60 45 67 49 \n", + "1 2013-12-22 56 48 39 43 36 \n", + "2 2013-12-23 58 45 32 31 27 \n", + "3 2013-12-24 61 46 31 36 28 \n", + "4 2013-12-25 58 50 41 44 40 \n", + "\n", + " DewPointLowF HumidityHighPercent HumidityAvgPercent HumidityLowPercent ... \\\n", + "0 43 93 75 57 ... \n", + "1 28 93 68 43 ... \n", + "2 23 76 52 27 ... \n", + "3 21 89 56 22 ... \n", + "4 36 86 71 56 ... \n", + "\n", + " SeaLevelPressureAvgInches SeaLevelPressureLowInches VisibilityHighMiles \\\n", + "0 29.68 29.59 10 \n", + "1 30.13 29.87 10 \n", + "2 30.49 30.41 10 \n", + "3 30.45 30.3 10 \n", + "4 30.33 30.27 10 \n", + "\n", + " VisibilityAvgMiles VisibilityLowMiles WindHighMPH WindAvgMPH WindGustMPH \\\n", + "0 7 2 20 4 31 \n", + "1 10 5 16 6 25 \n", + "2 10 10 8 3 12 \n", + "3 10 7 12 4 20 \n", + "4 10 7 10 2 16 \n", + "\n", + " PrecipitationSumInches Events \n", + "0 0.46 Rain , Thunderstorm \n", + "1 0 \n", + "2 0 \n", + "3 0 \n", + "4 T \n", + "\n", + "[5 rows x 21 columns]" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "austin.head()" ] }, { @@ -113,20 +465,37 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Your code here\n" + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array(['49', '36', '27', '28', '40', '39', '41', '26', '42', '22', '48',\n", + " '32', '8', '11', '45', '55', '61', '37', '47', '25', '23', '20',\n", + " '33', '30', '29', '17', '14', '13', '54', '59', '15', '24', '34',\n", + " '35', '57', '50', '53', '60', '46', '56', '51', '31', '38', '62',\n", + " '43', '63', '64', '67', '66', '58', '70', '68', '65', '69', '71',\n", + " '72', '-', '73', '74', '21', '44', '52', '12', '75', '76', '18'],\n", + " dtype=object)" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "austin.DewPointAvgF.unique()" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 13, "metadata": {}, "outputs": [], "source": [ - "# Your observation here\n" + "# All the values except one are integers, but the \"-\" is what make Pandas treat the column as an object" ] }, { @@ -140,7 +509,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 31, "metadata": {}, "outputs": [], "source": [ @@ -157,7 +526,7 @@ "metadata": {}, "outputs": [], "source": [ - "# Your code here\n" + "austin[wrong_type_columns] = austin[wrong_type_columns].apply(pd.to_numeric, errors=\"coerce\")" ] }, { @@ -169,11 +538,46 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Your code here\n" + "execution_count": 49, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 1319 entries, 0 to 1318\n", + "Data columns (total 21 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 Date 1319 non-null object \n", + " 1 TempHighF 1319 non-null int64 \n", + " 2 TempAvgF 1319 non-null int64 \n", + " 3 TempLowF 1319 non-null int64 \n", + " 4 DewPointHighF 1312 non-null float64\n", + " 5 DewPointAvgF 1312 non-null float64\n", + " 6 DewPointLowF 1312 non-null float64\n", + " 7 HumidityHighPercent 1317 non-null float64\n", + " 8 HumidityAvgPercent 1317 non-null float64\n", + " 9 HumidityLowPercent 1317 non-null float64\n", + " 10 SeaLevelPressureHighInches 1316 non-null float64\n", + " 11 SeaLevelPressureAvgInches 1316 non-null float64\n", + " 12 SeaLevelPressureLowInches 1316 non-null float64\n", + " 13 VisibilityHighMiles 1307 non-null float64\n", + " 14 VisibilityAvgMiles 1307 non-null float64\n", + " 15 VisibilityLowMiles 1307 non-null float64\n", + " 16 WindHighMPH 1317 non-null float64\n", + " 17 WindAvgMPH 1317 non-null float64\n", + " 18 WindGustMPH 1315 non-null float64\n", + " 19 PrecipitationSumInches 1195 non-null float64\n", + " 20 Events 1319 non-null object \n", + "dtypes: float64(16), int64(3), object(2)\n", + "memory usage: 216.5+ KB\n" + ] + } + ], + "source": [ + "austin.info()" ] }, { @@ -200,11 +604,413 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Your code here\n" + "execution_count": 65, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
DateTempHighFTempAvgFTempLowFDewPointHighFDewPointAvgFDewPointLowFHumidityHighPercentHumidityAvgPercentHumidityLowPercent...SeaLevelPressureAvgInchesSeaLevelPressureLowInchesVisibilityHighMilesVisibilityAvgMilesVisibilityLowMilesWindHighMPHWindAvgMPHWindGustMPHPrecipitationSumInchesEvents
42013-12-2558504144.040.036.086.071.056.0...30.3330.2710.010.07.010.02.016.0NaN
62013-12-2760534541.039.037.083.065.047.0...30.3930.3410.09.07.07.01.011.0NaN
72013-12-2862514043.039.033.092.064.036.0...30.1730.0410.010.07.010.02.014.0NaN
422014-02-0176665562.059.041.081.071.060.0...29.8129.7510.010.09.014.06.026.0NaNRain
512014-02-1060483549.036.030.082.074.066.0...30.1530.0210.08.04.015.09.023.0NaNRain
..................................................................
12692017-06-1294857572.068.062.087.062.036.0...29.8929.8110.010.02.015.07.022.0NaNRain
12882017-07-0198897976.072.066.085.062.038.0...30.0229.9210.09.06.015.07.024.0NaN
13042017-07-1798887775.071.066.088.063.038.0...29.9029.8410.010.010.016.03.023.0NaNRain , Thunderstorm
13082017-07-21104917774.068.061.085.056.027.0...29.9529.8610.010.010.014.05.024.0NaN
13132017-07-26103917875.070.061.091.058.025.0...30.0329.9310.010.010.015.06.027.0NaN
\n", + "

136 rows × 21 columns

\n", + "
" + ], + "text/plain": [ + " Date TempHighF TempAvgF TempLowF DewPointHighF DewPointAvgF \\\n", + "4 2013-12-25 58 50 41 44.0 40.0 \n", + "6 2013-12-27 60 53 45 41.0 39.0 \n", + "7 2013-12-28 62 51 40 43.0 39.0 \n", + "42 2014-02-01 76 66 55 62.0 59.0 \n", + "51 2014-02-10 60 48 35 49.0 36.0 \n", + "... ... ... ... ... ... ... \n", + "1269 2017-06-12 94 85 75 72.0 68.0 \n", + "1288 2017-07-01 98 89 79 76.0 72.0 \n", + "1304 2017-07-17 98 88 77 75.0 71.0 \n", + "1308 2017-07-21 104 91 77 74.0 68.0 \n", + "1313 2017-07-26 103 91 78 75.0 70.0 \n", + "\n", + " DewPointLowF HumidityHighPercent HumidityAvgPercent \\\n", + "4 36.0 86.0 71.0 \n", + "6 37.0 83.0 65.0 \n", + "7 33.0 92.0 64.0 \n", + "42 41.0 81.0 71.0 \n", + "51 30.0 82.0 74.0 \n", + "... ... ... ... \n", + "1269 62.0 87.0 62.0 \n", + "1288 66.0 85.0 62.0 \n", + "1304 66.0 88.0 63.0 \n", + "1308 61.0 85.0 56.0 \n", + "1313 61.0 91.0 58.0 \n", + "\n", + " HumidityLowPercent ... SeaLevelPressureAvgInches \\\n", + "4 56.0 ... 30.33 \n", + "6 47.0 ... 30.39 \n", + "7 36.0 ... 30.17 \n", + "42 60.0 ... 29.81 \n", + "51 66.0 ... 30.15 \n", + "... ... ... ... \n", + "1269 36.0 ... 29.89 \n", + "1288 38.0 ... 30.02 \n", + "1304 38.0 ... 29.90 \n", + "1308 27.0 ... 29.95 \n", + "1313 25.0 ... 30.03 \n", + "\n", + " SeaLevelPressureLowInches VisibilityHighMiles VisibilityAvgMiles \\\n", + "4 30.27 10.0 10.0 \n", + "6 30.34 10.0 9.0 \n", + "7 30.04 10.0 10.0 \n", + "42 29.75 10.0 10.0 \n", + "51 30.02 10.0 8.0 \n", + "... ... ... ... \n", + "1269 29.81 10.0 10.0 \n", + "1288 29.92 10.0 9.0 \n", + "1304 29.84 10.0 10.0 \n", + "1308 29.86 10.0 10.0 \n", + "1313 29.93 10.0 10.0 \n", + "\n", + " VisibilityLowMiles WindHighMPH WindAvgMPH WindGustMPH \\\n", + "4 7.0 10.0 2.0 16.0 \n", + "6 7.0 7.0 1.0 11.0 \n", + "7 7.0 10.0 2.0 14.0 \n", + "42 9.0 14.0 6.0 26.0 \n", + "51 4.0 15.0 9.0 23.0 \n", + "... ... ... ... ... \n", + "1269 2.0 15.0 7.0 22.0 \n", + "1288 6.0 15.0 7.0 24.0 \n", + "1304 10.0 16.0 3.0 23.0 \n", + "1308 10.0 14.0 5.0 24.0 \n", + "1313 10.0 15.0 6.0 27.0 \n", + "\n", + " PrecipitationSumInches Events \n", + "4 NaN \n", + "6 NaN \n", + "7 NaN \n", + "42 NaN Rain \n", + "51 NaN Rain \n", + "... ... ... \n", + "1269 NaN Rain \n", + "1288 NaN \n", + "1304 NaN Rain , Thunderstorm \n", + "1308 NaN \n", + "1313 NaN \n", + "\n", + "[136 rows x 21 columns]" + ] + }, + "execution_count": 65, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "missing_values = austin.loc[austin.isnull().any(axis=1)]\n", + "missing_values" ] }, { @@ -233,11 +1039,124 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 66, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "1319" + ] + }, + "execution_count": 66, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(austin)" + ] + }, + { + "cell_type": "code", + "execution_count": 68, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Date 0\n", + "TempHighF 0\n", + "TempAvgF 0\n", + "TempLowF 0\n", + "DewPointHighF 7\n", + "DewPointAvgF 7\n", + "DewPointLowF 7\n", + "HumidityHighPercent 2\n", + "HumidityAvgPercent 2\n", + "HumidityLowPercent 2\n", + "SeaLevelPressureHighInches 3\n", + "SeaLevelPressureAvgInches 3\n", + "SeaLevelPressureLowInches 3\n", + "VisibilityHighMiles 12\n", + "VisibilityAvgMiles 12\n", + "VisibilityLowMiles 12\n", + "WindHighMPH 2\n", + "WindAvgMPH 2\n", + "WindGustMPH 4\n", + "PrecipitationSumInches 124\n", + "Events 0\n", + "dtype: int64" + ] + }, + "execution_count": 68, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "austin.isna().sum()" + ] + }, + { + "cell_type": "code", + "execution_count": 67, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "136" + ] + }, + "execution_count": 67, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Your code here\n" + "len(missing_values)" + ] + }, + { + "cell_type": "code", + "execution_count": 69, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Date 0\n", + "TempHighF 0\n", + "TempAvgF 0\n", + "TempLowF 0\n", + "DewPointHighF 7\n", + "DewPointAvgF 7\n", + "DewPointLowF 7\n", + "HumidityHighPercent 2\n", + "HumidityAvgPercent 2\n", + "HumidityLowPercent 2\n", + "SeaLevelPressureHighInches 3\n", + "SeaLevelPressureAvgInches 3\n", + "SeaLevelPressureLowInches 3\n", + "VisibilityHighMiles 12\n", + "VisibilityAvgMiles 12\n", + "VisibilityLowMiles 12\n", + "WindHighMPH 2\n", + "WindAvgMPH 2\n", + "WindGustMPH 4\n", + "PrecipitationSumInches 124\n", + "Events 0\n", + "dtype: int64" + ] + }, + "execution_count": 69, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "missing_values.isna().sum()" ] }, { @@ -249,11 +1168,22 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 70, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "0.10310841546626232" + ] + }, + "execution_count": 70, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Your code here\n" + "len(missing_values)/len(austin)\n" ] }, { @@ -267,11 +1197,43 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Your code here\n" + "execution_count": 71, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Date 0\n", + "TempHighF 0\n", + "TempAvgF 0\n", + "TempLowF 0\n", + "DewPointHighF 7\n", + "DewPointAvgF 7\n", + "DewPointLowF 7\n", + "HumidityHighPercent 2\n", + "HumidityAvgPercent 2\n", + "HumidityLowPercent 2\n", + "SeaLevelPressureHighInches 3\n", + "SeaLevelPressureAvgInches 3\n", + "SeaLevelPressureLowInches 3\n", + "VisibilityHighMiles 12\n", + "VisibilityAvgMiles 12\n", + "VisibilityLowMiles 12\n", + "WindHighMPH 2\n", + "WindAvgMPH 2\n", + "WindGustMPH 4\n", + "PrecipitationSumInches 124\n", + "Events 0\n", + "dtype: int64" + ] + }, + "execution_count": 71, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "missing_values.isna().sum()" ] }, { @@ -283,11 +1245,22 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 73, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "0.09401061410159212" + ] + }, + "execution_count": 73, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Your code here\n" + "austin[\"PrecipitationSumInches\"].isna().sum()/len(austin)\n" ] }, { @@ -309,12 +1282,401 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 78, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
DateTempHighFTempAvgFTempLowFDewPointHighFDewPointAvgFDewPointLowFHumidityHighPercentHumidityAvgPercentHumidityLowPercentSeaLevelPressureHighInchesSeaLevelPressureAvgInchesSeaLevelPressureLowInchesVisibilityHighMilesVisibilityAvgMilesVisibilityLowMilesWindHighMPHWindAvgMPHWindGustMPHEvents
02013-12-2174604567.049.043.093.075.057.029.8629.6829.5910.07.02.020.04.031.0Rain , Thunderstorm
12013-12-2256483943.036.028.093.068.043.030.4130.1329.8710.010.05.016.06.025.0
22013-12-2358453231.027.023.076.052.027.030.5630.4930.4110.010.010.08.03.012.0
32013-12-2461463136.028.021.089.056.022.030.5630.4530.3010.010.07.012.04.020.0
42013-12-2558504144.040.036.086.071.056.030.4130.3330.2710.010.07.010.02.016.0
...............................................................
13142017-07-27103897571.067.061.082.054.025.030.0429.9729.8810.010.010.012.05.021.0
13152017-07-28105917671.064.055.087.054.020.029.9729.9029.8110.010.010.014.05.020.0
13162017-07-29107927772.064.055.082.051.019.029.9129.8629.7910.010.010.012.04.017.0
13172017-07-30106937970.068.063.069.048.027.029.9629.9129.8710.010.010.013.04.020.0
13182017-07-3199887766.061.054.064.043.022.030.0429.9729.9110.010.010.012.04.020.0
\n", + "

1319 rows × 20 columns

\n", + "
" + ], + "text/plain": [ + " Date TempHighF TempAvgF TempLowF DewPointHighF DewPointAvgF \\\n", + "0 2013-12-21 74 60 45 67.0 49.0 \n", + "1 2013-12-22 56 48 39 43.0 36.0 \n", + "2 2013-12-23 58 45 32 31.0 27.0 \n", + "3 2013-12-24 61 46 31 36.0 28.0 \n", + "4 2013-12-25 58 50 41 44.0 40.0 \n", + "... ... ... ... ... ... ... \n", + "1314 2017-07-27 103 89 75 71.0 67.0 \n", + "1315 2017-07-28 105 91 76 71.0 64.0 \n", + "1316 2017-07-29 107 92 77 72.0 64.0 \n", + "1317 2017-07-30 106 93 79 70.0 68.0 \n", + "1318 2017-07-31 99 88 77 66.0 61.0 \n", + "\n", + " DewPointLowF HumidityHighPercent HumidityAvgPercent \\\n", + "0 43.0 93.0 75.0 \n", + "1 28.0 93.0 68.0 \n", + "2 23.0 76.0 52.0 \n", + "3 21.0 89.0 56.0 \n", + "4 36.0 86.0 71.0 \n", + "... ... ... ... \n", + "1314 61.0 82.0 54.0 \n", + "1315 55.0 87.0 54.0 \n", + "1316 55.0 82.0 51.0 \n", + "1317 63.0 69.0 48.0 \n", + "1318 54.0 64.0 43.0 \n", + "\n", + " HumidityLowPercent SeaLevelPressureHighInches \\\n", + "0 57.0 29.86 \n", + "1 43.0 30.41 \n", + "2 27.0 30.56 \n", + "3 22.0 30.56 \n", + "4 56.0 30.41 \n", + "... ... ... \n", + "1314 25.0 30.04 \n", + "1315 20.0 29.97 \n", + "1316 19.0 29.91 \n", + "1317 27.0 29.96 \n", + "1318 22.0 30.04 \n", + "\n", + " SeaLevelPressureAvgInches SeaLevelPressureLowInches \\\n", + "0 29.68 29.59 \n", + "1 30.13 29.87 \n", + "2 30.49 30.41 \n", + "3 30.45 30.30 \n", + "4 30.33 30.27 \n", + "... ... ... \n", + "1314 29.97 29.88 \n", + "1315 29.90 29.81 \n", + "1316 29.86 29.79 \n", + "1317 29.91 29.87 \n", + "1318 29.97 29.91 \n", + "\n", + " VisibilityHighMiles VisibilityAvgMiles VisibilityLowMiles \\\n", + "0 10.0 7.0 2.0 \n", + "1 10.0 10.0 5.0 \n", + "2 10.0 10.0 10.0 \n", + "3 10.0 10.0 7.0 \n", + "4 10.0 10.0 7.0 \n", + "... ... ... ... \n", + "1314 10.0 10.0 10.0 \n", + "1315 10.0 10.0 10.0 \n", + "1316 10.0 10.0 10.0 \n", + "1317 10.0 10.0 10.0 \n", + "1318 10.0 10.0 10.0 \n", + "\n", + " WindHighMPH WindAvgMPH WindGustMPH Events \n", + "0 20.0 4.0 31.0 Rain , Thunderstorm \n", + "1 16.0 6.0 25.0 \n", + "2 8.0 3.0 12.0 \n", + "3 12.0 4.0 20.0 \n", + "4 10.0 2.0 16.0 \n", + "... ... ... ... ... \n", + "1314 12.0 5.0 21.0 \n", + "1315 14.0 5.0 20.0 \n", + "1316 12.0 4.0 17.0 \n", + "1317 13.0 4.0 20.0 \n", + "1318 12.0 4.0 20.0 \n", + "\n", + "[1319 rows x 20 columns]" + ] + }, + "execution_count": 78, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Your code here \n", - "\n", + "austin.drop(columns=\"PrecipitationSumInches\", inplace=True)\n", "\n", "# Print `austin` to confirm the column is indeed removed\n", "\n", @@ -336,11 +1698,21 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 79, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\FernandoSanz-Extreme\\AppData\\Local\\Temp\\ipykernel_19480\\2951143596.py:2: FutureWarning: DataFrame.interpolate with object dtype is deprecated and will raise in a future version. Call obj.infer_objects(copy=False) before interpolating instead.\n", + " austin_fixed = austin.interpolate(inplace=False)\n" + ] + } + ], "source": [ - "# Your code here\n" + "# Your code here\n", + "austin_fixed = austin.interpolate(inplace=False)" ] }, { @@ -352,11 +1724,82 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Your code here\n" + "execution_count": 81, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Date 0\n", + "TempHighF 0\n", + "TempAvgF 0\n", + "TempLowF 0\n", + "DewPointHighF 0\n", + "DewPointAvgF 0\n", + "DewPointLowF 0\n", + "HumidityHighPercent 0\n", + "HumidityAvgPercent 0\n", + "HumidityLowPercent 0\n", + "SeaLevelPressureHighInches 0\n", + "SeaLevelPressureAvgInches 0\n", + "SeaLevelPressureLowInches 0\n", + "VisibilityHighMiles 0\n", + "VisibilityAvgMiles 0\n", + "VisibilityLowMiles 0\n", + "WindHighMPH 0\n", + "WindAvgMPH 0\n", + "WindGustMPH 0\n", + "Events 0\n", + "dtype: int64" + ] + }, + "execution_count": 81, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "austin_fixed.isna().sum()" + ] + }, + { + "cell_type": "code", + "execution_count": 82, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Date 0\n", + "TempHighF 0\n", + "TempAvgF 0\n", + "TempLowF 0\n", + "DewPointHighF 7\n", + "DewPointAvgF 7\n", + "DewPointLowF 7\n", + "HumidityHighPercent 2\n", + "HumidityAvgPercent 2\n", + "HumidityLowPercent 2\n", + "SeaLevelPressureHighInches 3\n", + "SeaLevelPressureAvgInches 3\n", + "SeaLevelPressureLowInches 3\n", + "VisibilityHighMiles 12\n", + "VisibilityAvgMiles 12\n", + "VisibilityLowMiles 12\n", + "WindHighMPH 2\n", + "WindAvgMPH 2\n", + "WindGustMPH 4\n", + "Events 0\n", + "dtype: int64" + ] + }, + "execution_count": 82, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "austin.isna().sum()" ] }, { @@ -377,11 +1820,32 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Your code here:\n" + "execution_count": 107, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Events\n", + " 903\n", + "Rain 192\n", + "Rain , Thunderstorm 137\n", + "Fog , Rain , Thunderstorm 33\n", + "Fog 21\n", + "Thunderstorm 17\n", + "Fog , Rain 14\n", + "Rain , Snow 1\n", + "Fog , Thunderstorm 1\n", + "Name: count, dtype: int64" + ] + }, + "execution_count": 107, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "austin_fixed.Events.value_counts()" ] }, { @@ -395,11 +1859,30 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Your answer:\n" + "execution_count": 106, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\FernandoSanz-Extreme\\AppData\\Local\\Temp\\ipykernel_19480\\500100289.py:1: FutureWarning: using in Series.agg cannot aggregate and has been deprecated. Use Series.transform to keep behavior unchanged.\n", + " austin_fixed.Events.str.split(\" , \").agg(len).max()\n" + ] + }, + { + "data": { + "text/plain": [ + "3" + ] + }, + "execution_count": 106, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "austin_fixed.Events.str.split(\" , \").agg(len).max()" ] }, { @@ -415,14 +1898,403 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 110, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
DateTempHighFTempAvgFTempLowFDewPointHighFDewPointAvgFDewPointLowFHumidityHighPercentHumidityAvgPercentHumidityLowPercent...VisibilityAvgMilesVisibilityLowMilesWindHighMPHWindAvgMPHWindGustMPHEventsSnowFogRainThunderstorm
02013-12-2174604567.049.043.093.075.057.0...7.02.020.04.031.0Rain , Thunderstorm0000
12013-12-2256483943.036.028.093.068.043.0...10.05.016.06.025.00000
22013-12-2358453231.027.023.076.052.027.0...10.010.08.03.012.00000
32013-12-2461463136.028.021.089.056.022.0...10.07.012.04.020.00000
42013-12-2558504144.040.036.086.071.056.0...10.07.010.02.016.00000
..................................................................
13142017-07-27103897571.067.061.082.054.025.0...10.010.012.05.021.00000
13152017-07-28105917671.064.055.087.054.020.0...10.010.014.05.020.00000
13162017-07-29107927772.064.055.082.051.019.0...10.010.012.04.017.00000
13172017-07-30106937970.068.063.069.048.027.0...10.010.013.04.020.00000
13182017-07-3199887766.061.054.064.043.022.0...10.010.012.04.020.00000
\n", + "

1319 rows × 24 columns

\n", + "
" + ], + "text/plain": [ + " Date TempHighF TempAvgF TempLowF DewPointHighF DewPointAvgF \\\n", + "0 2013-12-21 74 60 45 67.0 49.0 \n", + "1 2013-12-22 56 48 39 43.0 36.0 \n", + "2 2013-12-23 58 45 32 31.0 27.0 \n", + "3 2013-12-24 61 46 31 36.0 28.0 \n", + "4 2013-12-25 58 50 41 44.0 40.0 \n", + "... ... ... ... ... ... ... \n", + "1314 2017-07-27 103 89 75 71.0 67.0 \n", + "1315 2017-07-28 105 91 76 71.0 64.0 \n", + "1316 2017-07-29 107 92 77 72.0 64.0 \n", + "1317 2017-07-30 106 93 79 70.0 68.0 \n", + "1318 2017-07-31 99 88 77 66.0 61.0 \n", + "\n", + " DewPointLowF HumidityHighPercent HumidityAvgPercent \\\n", + "0 43.0 93.0 75.0 \n", + "1 28.0 93.0 68.0 \n", + "2 23.0 76.0 52.0 \n", + "3 21.0 89.0 56.0 \n", + "4 36.0 86.0 71.0 \n", + "... ... ... ... \n", + "1314 61.0 82.0 54.0 \n", + "1315 55.0 87.0 54.0 \n", + "1316 55.0 82.0 51.0 \n", + "1317 63.0 69.0 48.0 \n", + "1318 54.0 64.0 43.0 \n", + "\n", + " HumidityLowPercent ... VisibilityAvgMiles VisibilityLowMiles \\\n", + "0 57.0 ... 7.0 2.0 \n", + "1 43.0 ... 10.0 5.0 \n", + "2 27.0 ... 10.0 10.0 \n", + "3 22.0 ... 10.0 7.0 \n", + "4 56.0 ... 10.0 7.0 \n", + "... ... ... ... ... \n", + "1314 25.0 ... 10.0 10.0 \n", + "1315 20.0 ... 10.0 10.0 \n", + "1316 19.0 ... 10.0 10.0 \n", + "1317 27.0 ... 10.0 10.0 \n", + "1318 22.0 ... 10.0 10.0 \n", + "\n", + " WindHighMPH WindAvgMPH WindGustMPH Events Snow Fog \\\n", + "0 20.0 4.0 31.0 Rain , Thunderstorm 0 0 \n", + "1 16.0 6.0 25.0 0 0 \n", + "2 8.0 3.0 12.0 0 0 \n", + "3 12.0 4.0 20.0 0 0 \n", + "4 10.0 2.0 16.0 0 0 \n", + "... ... ... ... ... ... ... \n", + "1314 12.0 5.0 21.0 0 0 \n", + "1315 14.0 5.0 20.0 0 0 \n", + "1316 12.0 4.0 17.0 0 0 \n", + "1317 13.0 4.0 20.0 0 0 \n", + "1318 12.0 4.0 20.0 0 0 \n", + "\n", + " Rain Thunderstorm \n", + "0 0 0 \n", + "1 0 0 \n", + "2 0 0 \n", + "3 0 0 \n", + "4 0 0 \n", + "... ... ... \n", + "1314 0 0 \n", + "1315 0 0 \n", + "1316 0 0 \n", + "1317 0 0 \n", + "1318 0 0 \n", + "\n", + "[1319 rows x 24 columns]" + ] + }, + "execution_count": 110, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "event_list = ['Snow', 'Fog', 'Rain', 'Thunderstorm']\n", "\n", "# Your code here\n", - "\n", + "for event in event_list:\n", + " austin_fixed[event] = 0\n", "\n", "# Print your new dataframe to check whether new columns have been created:\n", "\n", @@ -450,7 +2322,8 @@ "metadata": {}, "outputs": [], "source": [ - "# Your code here\n" + "for event in event_list:\n", + " austin_fixed[event] = austin_fixed[\"Events\"].str.contains(event).astype(int)" ] }, { @@ -462,11 +2335,399 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 125, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
DateTempHighFTempAvgFTempLowFDewPointHighFDewPointAvgFDewPointLowFHumidityHighPercentHumidityAvgPercentHumidityLowPercent...VisibilityAvgMilesVisibilityLowMilesWindHighMPHWindAvgMPHWindGustMPHEventsSnowFogRainThunderstorm
02013-12-2174604567.049.043.093.075.057.0...7.02.020.04.031.0Rain , Thunderstorm0011
12013-12-2256483943.036.028.093.068.043.0...10.05.016.06.025.00000
22013-12-2358453231.027.023.076.052.027.0...10.010.08.03.012.00000
32013-12-2461463136.028.021.089.056.022.0...10.07.012.04.020.00000
42013-12-2558504144.040.036.086.071.056.0...10.07.010.02.016.00000
..................................................................
13142017-07-27103897571.067.061.082.054.025.0...10.010.012.05.021.00000
13152017-07-28105917671.064.055.087.054.020.0...10.010.014.05.020.00000
13162017-07-29107927772.064.055.082.051.019.0...10.010.012.04.017.00000
13172017-07-30106937970.068.063.069.048.027.0...10.010.013.04.020.00000
13182017-07-3199887766.061.054.064.043.022.0...10.010.012.04.020.00000
\n", + "

1319 rows × 24 columns

\n", + "
" + ], + "text/plain": [ + " Date TempHighF TempAvgF TempLowF DewPointHighF DewPointAvgF \\\n", + "0 2013-12-21 74 60 45 67.0 49.0 \n", + "1 2013-12-22 56 48 39 43.0 36.0 \n", + "2 2013-12-23 58 45 32 31.0 27.0 \n", + "3 2013-12-24 61 46 31 36.0 28.0 \n", + "4 2013-12-25 58 50 41 44.0 40.0 \n", + "... ... ... ... ... ... ... \n", + "1314 2017-07-27 103 89 75 71.0 67.0 \n", + "1315 2017-07-28 105 91 76 71.0 64.0 \n", + "1316 2017-07-29 107 92 77 72.0 64.0 \n", + "1317 2017-07-30 106 93 79 70.0 68.0 \n", + "1318 2017-07-31 99 88 77 66.0 61.0 \n", + "\n", + " DewPointLowF HumidityHighPercent HumidityAvgPercent \\\n", + "0 43.0 93.0 75.0 \n", + "1 28.0 93.0 68.0 \n", + "2 23.0 76.0 52.0 \n", + "3 21.0 89.0 56.0 \n", + "4 36.0 86.0 71.0 \n", + "... ... ... ... \n", + "1314 61.0 82.0 54.0 \n", + "1315 55.0 87.0 54.0 \n", + "1316 55.0 82.0 51.0 \n", + "1317 63.0 69.0 48.0 \n", + "1318 54.0 64.0 43.0 \n", + "\n", + " HumidityLowPercent ... VisibilityAvgMiles VisibilityLowMiles \\\n", + "0 57.0 ... 7.0 2.0 \n", + "1 43.0 ... 10.0 5.0 \n", + "2 27.0 ... 10.0 10.0 \n", + "3 22.0 ... 10.0 7.0 \n", + "4 56.0 ... 10.0 7.0 \n", + "... ... ... ... ... \n", + "1314 25.0 ... 10.0 10.0 \n", + "1315 20.0 ... 10.0 10.0 \n", + "1316 19.0 ... 10.0 10.0 \n", + "1317 27.0 ... 10.0 10.0 \n", + "1318 22.0 ... 10.0 10.0 \n", + "\n", + " WindHighMPH WindAvgMPH WindGustMPH Events Snow Fog \\\n", + "0 20.0 4.0 31.0 Rain , Thunderstorm 0 0 \n", + "1 16.0 6.0 25.0 0 0 \n", + "2 8.0 3.0 12.0 0 0 \n", + "3 12.0 4.0 20.0 0 0 \n", + "4 10.0 2.0 16.0 0 0 \n", + "... ... ... ... ... ... ... \n", + "1314 12.0 5.0 21.0 0 0 \n", + "1315 14.0 5.0 20.0 0 0 \n", + "1316 12.0 4.0 17.0 0 0 \n", + "1317 13.0 4.0 20.0 0 0 \n", + "1318 12.0 4.0 20.0 0 0 \n", + "\n", + " Rain Thunderstorm \n", + "0 1 1 \n", + "1 0 0 \n", + "2 0 0 \n", + "3 0 0 \n", + "4 0 0 \n", + "... ... ... \n", + "1314 0 0 \n", + "1315 0 0 \n", + "1316 0 0 \n", + "1317 0 0 \n", + "1318 0 0 \n", + "\n", + "[1319 rows x 24 columns]" + ] + }, + "execution_count": 125, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Your code here\n" + "austin_fixed" ] }, { @@ -478,11 +2739,11 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 127, "metadata": {}, "outputs": [], "source": [ - "# Your code here\n" + "austin_fixed.drop(columns=\"Events\", inplace=True)" ] }, { @@ -500,11 +2761,11 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 144, "metadata": {}, "outputs": [], "source": [ - "# Your code here\n" + "austin_fixed[\"Date\"] = austin_fixed[\"Date\"].apply(lambda x: datetime.datetime.strptime(x, \"%Y-%m-%d\").toordinal())" ] }, { @@ -516,9 +2777,216 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 145, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
DateTempHighFTempAvgFTempLowFDewPointHighFDewPointAvgFDewPointLowFHumidityHighPercentHumidityAvgPercentHumidityLowPercent...VisibilityHighMilesVisibilityAvgMilesVisibilityLowMilesWindHighMPHWindAvgMPHWindGustMPHSnowFogRainThunderstorm
073522374604567.049.043.093.075.057.0...10.07.02.020.04.031.00011
173522456483943.036.028.093.068.043.0...10.010.05.016.06.025.00000
273522558453231.027.023.076.052.027.0...10.010.010.08.03.012.00000
373522661463136.028.021.089.056.022.0...10.010.07.012.04.020.00000
473522758504144.040.036.086.071.056.0...10.010.07.010.02.016.00000
\n", + "

5 rows × 23 columns

\n", + "
" + ], + "text/plain": [ + " Date TempHighF TempAvgF TempLowF DewPointHighF DewPointAvgF \\\n", + "0 735223 74 60 45 67.0 49.0 \n", + "1 735224 56 48 39 43.0 36.0 \n", + "2 735225 58 45 32 31.0 27.0 \n", + "3 735226 61 46 31 36.0 28.0 \n", + "4 735227 58 50 41 44.0 40.0 \n", + "\n", + " DewPointLowF HumidityHighPercent HumidityAvgPercent HumidityLowPercent \\\n", + "0 43.0 93.0 75.0 57.0 \n", + "1 28.0 93.0 68.0 43.0 \n", + "2 23.0 76.0 52.0 27.0 \n", + "3 21.0 89.0 56.0 22.0 \n", + "4 36.0 86.0 71.0 56.0 \n", + "\n", + " ... VisibilityHighMiles VisibilityAvgMiles VisibilityLowMiles \\\n", + "0 ... 10.0 7.0 2.0 \n", + "1 ... 10.0 10.0 5.0 \n", + "2 ... 10.0 10.0 10.0 \n", + "3 ... 10.0 10.0 7.0 \n", + "4 ... 10.0 10.0 7.0 \n", + "\n", + " WindHighMPH WindAvgMPH WindGustMPH Snow Fog Rain Thunderstorm \n", + "0 20.0 4.0 31.0 0 0 1 1 \n", + "1 16.0 6.0 25.0 0 0 0 0 \n", + "2 8.0 3.0 12.0 0 0 0 0 \n", + "3 12.0 4.0 20.0 0 0 0 0 \n", + "4 10.0 2.0 16.0 0 0 0 0 \n", + "\n", + "[5 rows x 23 columns]" + ] + }, + "execution_count": 145, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "austin_fixed.head(5)" ] @@ -577,11 +3045,12 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 146, "metadata": {}, "outputs": [], "source": [ - "# Your code here:\n" + "y = austin_fixed[\"TempAvgF\"]\n", + "X = austin_fixed.drop(columns=\"TempAvgF\")" ] }, { @@ -593,11 +3062,12 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 147, "metadata": {}, "outputs": [], "source": [ - "#Your code here:\n" + "# 🤖 Machine Learning\n", + "from sklearn.model_selection import train_test_split" ] }, { @@ -616,7 +3086,7 @@ "metadata": {}, "outputs": [], "source": [ - "#Your code here:\n" + "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)" ] }, { @@ -643,9 +3113,21 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "1055.0" + ] + }, + "execution_count": 150, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Your code here:\n" + "ts_rows = round(len(austin_fixed)*0.8)\n", + "ts_rows" ] }, { @@ -659,9 +3141,398 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], - "source": [ - "# Your code here:\n" + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
DateTempHighFTempLowFDewPointHighFDewPointAvgFDewPointLowFHumidityHighPercentHumidityAvgPercentHumidityLowPercentSeaLevelPressureHighInches...VisibilityHighMilesVisibilityAvgMilesVisibilityLowMilesWindHighMPHWindAvgMPHWindGustMPHSnowFogRainThunderstorm
0735223744567.049.043.093.075.057.029.86...10.07.02.020.04.031.00011
1735224563943.036.028.093.068.043.030.41...10.010.05.016.06.025.00000
2735225583231.027.023.076.052.027.030.56...10.010.010.08.03.012.00000
3735226613136.028.021.089.056.022.030.56...10.010.07.012.04.020.00000
4735227584144.040.036.086.071.056.030.41...10.010.07.010.02.016.00000
..................................................................
1050736273726368.066.059.0100.091.081.030.27...10.06.01.07.02.012.00010
1051736274765967.060.057.093.085.076.030.19...10.010.05.012.02.020.00010
1052736275716169.065.061.0100.094.087.030.14...10.05.01.07.02.012.00010
1053736276696266.064.062.0100.092.084.030.21...10.08.02.010.04.016.00011
1054736277656062.061.058.0100.094.087.030.29...10.06.01.010.06.017.00010
\n", + "

1055 rows × 22 columns

\n", + "
" + ], + "text/plain": [ + " Date TempHighF TempLowF DewPointHighF DewPointAvgF DewPointLowF \\\n", + "0 735223 74 45 67.0 49.0 43.0 \n", + "1 735224 56 39 43.0 36.0 28.0 \n", + "2 735225 58 32 31.0 27.0 23.0 \n", + "3 735226 61 31 36.0 28.0 21.0 \n", + "4 735227 58 41 44.0 40.0 36.0 \n", + "... ... ... ... ... ... ... \n", + "1050 736273 72 63 68.0 66.0 59.0 \n", + "1051 736274 76 59 67.0 60.0 57.0 \n", + "1052 736275 71 61 69.0 65.0 61.0 \n", + "1053 736276 69 62 66.0 64.0 62.0 \n", + "1054 736277 65 60 62.0 61.0 58.0 \n", + "\n", + " HumidityHighPercent HumidityAvgPercent HumidityLowPercent \\\n", + "0 93.0 75.0 57.0 \n", + "1 93.0 68.0 43.0 \n", + "2 76.0 52.0 27.0 \n", + "3 89.0 56.0 22.0 \n", + "4 86.0 71.0 56.0 \n", + "... ... ... ... \n", + "1050 100.0 91.0 81.0 \n", + "1051 93.0 85.0 76.0 \n", + "1052 100.0 94.0 87.0 \n", + "1053 100.0 92.0 84.0 \n", + "1054 100.0 94.0 87.0 \n", + "\n", + " SeaLevelPressureHighInches ... VisibilityHighMiles \\\n", + "0 29.86 ... 10.0 \n", + "1 30.41 ... 10.0 \n", + "2 30.56 ... 10.0 \n", + "3 30.56 ... 10.0 \n", + "4 30.41 ... 10.0 \n", + "... ... ... ... \n", + "1050 30.27 ... 10.0 \n", + "1051 30.19 ... 10.0 \n", + "1052 30.14 ... 10.0 \n", + "1053 30.21 ... 10.0 \n", + "1054 30.29 ... 10.0 \n", + "\n", + " VisibilityAvgMiles VisibilityLowMiles WindHighMPH WindAvgMPH \\\n", + "0 7.0 2.0 20.0 4.0 \n", + "1 10.0 5.0 16.0 6.0 \n", + "2 10.0 10.0 8.0 3.0 \n", + "3 10.0 7.0 12.0 4.0 \n", + "4 10.0 7.0 10.0 2.0 \n", + "... ... ... ... ... \n", + "1050 6.0 1.0 7.0 2.0 \n", + "1051 10.0 5.0 12.0 2.0 \n", + "1052 5.0 1.0 7.0 2.0 \n", + "1053 8.0 2.0 10.0 4.0 \n", + "1054 6.0 1.0 10.0 6.0 \n", + "\n", + " WindGustMPH Snow Fog Rain Thunderstorm \n", + "0 31.0 0 0 1 1 \n", + "1 25.0 0 0 0 0 \n", + "2 12.0 0 0 0 0 \n", + "3 20.0 0 0 0 0 \n", + "4 16.0 0 0 0 0 \n", + "... ... ... ... ... ... \n", + "1050 12.0 0 0 1 0 \n", + "1051 20.0 0 0 1 0 \n", + "1052 12.0 0 0 1 0 \n", + "1053 16.0 0 0 1 1 \n", + "1054 17.0 0 0 1 0 \n", + "\n", + "[1055 rows x 22 columns]" + ] + }, + "execution_count": 162, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "X_ts_train = X[X.index < ts_rows]\n", + "X_ts_test = X[X.index >= ts_rows]" ] }, { @@ -673,17 +3544,426 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 160, "metadata": {}, "outputs": [], "source": [ - "# Your code here:\n" + "y_ts_train = y[y.index <= ts_rows]\n", + "y_ts_test = y[y.index > ts_rows]" + ] + }, + { + "cell_type": "code", + "execution_count": 163, + "metadata": {}, + "outputs": [], + "source": [ + "# También se puede hacer incluyendo shuffle = False como parametro en el train_test_split:\n", + "\n", + "X_train2, X_test2, y_train2, y_test2 = train_test_split(X, y, test_size=0.2, shuffle=False)" + ] + }, + { + "cell_type": "code", + "execution_count": 166, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
DateTempHighFTempLowFDewPointHighFDewPointAvgFDewPointLowFHumidityHighPercentHumidityAvgPercentHumidityLowPercentSeaLevelPressureHighInches...VisibilityHighMilesVisibilityAvgMilesVisibilityLowMilesWindHighMPHWindAvgMPHWindGustMPHSnowFogRainThunderstorm
0TrueTrueTrueTrueTrueTrueTrueTrueTrueTrue...TrueTrueTrueTrueTrueTrueTrueTrueTrueTrue
1TrueTrueTrueTrueTrueTrueTrueTrueTrueTrue...TrueTrueTrueTrueTrueTrueTrueTrueTrueTrue
2TrueTrueTrueTrueTrueTrueTrueTrueTrueTrue...TrueTrueTrueTrueTrueTrueTrueTrueTrueTrue
3TrueTrueTrueTrueTrueTrueTrueTrueTrueTrue...TrueTrueTrueTrueTrueTrueTrueTrueTrueTrue
4TrueTrueTrueTrueTrueTrueTrueTrueTrueTrue...TrueTrueTrueTrueTrueTrueTrueTrueTrueTrue
..................................................................
1050TrueTrueTrueTrueTrueTrueTrueTrueTrueTrue...TrueTrueTrueTrueTrueTrueTrueTrueTrueTrue
1051TrueTrueTrueTrueTrueTrueTrueTrueTrueTrue...TrueTrueTrueTrueTrueTrueTrueTrueTrueTrue
1052TrueTrueTrueTrueTrueTrueTrueTrueTrueTrue...TrueTrueTrueTrueTrueTrueTrueTrueTrueTrue
1053TrueTrueTrueTrueTrueTrueTrueTrueTrueTrue...TrueTrueTrueTrueTrueTrueTrueTrueTrueTrue
1054TrueTrueTrueTrueTrueTrueTrueTrueTrueTrue...TrueTrueTrueTrueTrueTrueTrueTrueTrueTrue
\n", + "

1055 rows × 22 columns

\n", + "
" + ], + "text/plain": [ + " Date TempHighF TempLowF DewPointHighF DewPointAvgF DewPointLowF \\\n", + "0 True True True True True True \n", + "1 True True True True True True \n", + "2 True True True True True True \n", + "3 True True True True True True \n", + "4 True True True True True True \n", + "... ... ... ... ... ... ... \n", + "1050 True True True True True True \n", + "1051 True True True True True True \n", + "1052 True True True True True True \n", + "1053 True True True True True True \n", + "1054 True True True True True True \n", + "\n", + " HumidityHighPercent HumidityAvgPercent HumidityLowPercent \\\n", + "0 True True True \n", + "1 True True True \n", + "2 True True True \n", + "3 True True True \n", + "4 True True True \n", + "... ... ... ... \n", + "1050 True True True \n", + "1051 True True True \n", + "1052 True True True \n", + "1053 True True True \n", + "1054 True True True \n", + "\n", + " SeaLevelPressureHighInches ... VisibilityHighMiles \\\n", + "0 True ... True \n", + "1 True ... True \n", + "2 True ... True \n", + "3 True ... True \n", + "4 True ... True \n", + "... ... ... ... \n", + "1050 True ... True \n", + "1051 True ... True \n", + "1052 True ... True \n", + "1053 True ... True \n", + "1054 True ... True \n", + "\n", + " VisibilityAvgMiles VisibilityLowMiles WindHighMPH WindAvgMPH \\\n", + "0 True True True True \n", + "1 True True True True \n", + "2 True True True True \n", + "3 True True True True \n", + "4 True True True True \n", + "... ... ... ... ... \n", + "1050 True True True True \n", + "1051 True True True True \n", + "1052 True True True True \n", + "1053 True True True True \n", + "1054 True True True True \n", + "\n", + " WindGustMPH Snow Fog Rain Thunderstorm \n", + "0 True True True True True \n", + "1 True True True True True \n", + "2 True True True True True \n", + "3 True True True True True \n", + "4 True True True True True \n", + "... ... ... ... ... ... \n", + "1050 True True True True True \n", + "1051 True True True True True \n", + "1052 True True True True True \n", + "1053 True True True True True \n", + "1054 True True True True True \n", + "\n", + "[1055 rows x 22 columns]" + ] + }, + "execution_count": 166, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "X_train2 == X_ts_train" ] } ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "base", "language": "python", "name": "python3" }, @@ -697,7 +3977,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.9" + "version": "3.12.4" } }, "nbformat": 4,