# Wine

### Introduction:

This exercise is a adaptation from the UCI Wine dataset.
The only pupose is to practice deleting data with pandas.

### Step 1. Import the necessary libraries

In [8]:
import numpy as np
import pandas as pd

### Step 2. Import the dataset from this [address](https://archive.ics.uci.edu/ml/machine-learning-databases/wine/wine.data). 

In [3]:
import 
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/wine/wine.data'
data = pd.read_csv(url, header=None)
print(data.head())

   0      1     2     3     4    5     6     7     8     9     10    11    12  \
0   1  14.23  1.71  2.43  15.6  127  2.80  3.06  0.28  2.29  5.64  1.04  3.92   
1   1  13.20  1.78  2.14  11.2  100  2.65  2.76  0.26  1.28  4.38  1.05  3.40   
2   1  13.16  2.36  2.67  18.6  101  2.80  3.24  0.30  2.81  5.68  1.03  3.17   
3   1  14.37  1.95  2.50  16.8  113  3.85  3.49  0.24  2.18  7.80  0.86  3.45   
4   1  13.24  2.59  2.87  21.0  118  2.80  2.69  0.39  1.82  4.32  1.04  2.93   

     13  
0  1065  
1  1050  
2  1185  
3  1480  
4   735  


### Step 3. Assign it to a variable called wine

In [4]:
wine = data.copy()
print(wine.head())

   0      1     2     3     4    5     6     7     8     9     10    11    12  \
0   1  14.23  1.71  2.43  15.6  127  2.80  3.06  0.28  2.29  5.64  1.04  3.92   
1   1  13.20  1.78  2.14  11.2  100  2.65  2.76  0.26  1.28  4.38  1.05  3.40   
2   1  13.16  2.36  2.67  18.6  101  2.80  3.24  0.30  2.81  5.68  1.03  3.17   
3   1  14.37  1.95  2.50  16.8  113  3.85  3.49  0.24  2.18  7.80  0.86  3.45   
4   1  13.24  2.59  2.87  21.0  118  2.80  2.69  0.39  1.82  4.32  1.04  2.93   

     13  
0  1065  
1  1050  
2  1185  
3  1480  
4   735  


### Step 4. Delete the first, fourth, seventh, nineth, eleventh, thirteenth and fourteenth columns

In [5]:
columns_to_drop = [0, 3, 6, 8, 10, 12, 13]
wine = wine.drop(columns=columns_to_drop)
print(wine.head())

      1     2     4    5     7     9     11
0  14.23  1.71  15.6  127  3.06  2.29  1.04
1  13.20  1.78  11.2  100  2.76  1.28  1.05
2  13.16  2.36  18.6  101  3.24  2.81  1.03
3  14.37  1.95  16.8  113  3.49  2.18  0.86
4  13.24  2.59  21.0  118  2.69  1.82  1.04


### Step 5. Assign the columns as below:

The attributes are (dontated by Riccardo Leardi, riclea '@' anchem.unige.it):  
1) alcohol  
2) malic_acid  
3) alcalinity_of_ash  
4) magnesium  
5) flavanoids  
6) proanthocyanins  
7) hue 

In [6]:
new_column_names = ["alcohol", "malic_acid", "alcalinity_of_ash", "magnesium",
                    "flavanoids", "proanthocyanins", "hue"]
wine.columns = new_column_names
print(wine.head())

   alcohol  malic_acid  alcalinity_of_ash  magnesium  flavanoids  \
0    14.23        1.71               15.6        127        3.06   
1    13.20        1.78               11.2        100        2.76   
2    13.16        2.36               18.6        101        3.24   
3    14.37        1.95               16.8        113        3.49   
4    13.24        2.59               21.0        118        2.69   

   proanthocyanins   hue  
0             2.29  1.04  
1             1.28  1.05  
2             2.81  1.03  
3             2.18  0.86  
4             1.82  1.04  


### Step 6. Set the values of the first 3 rows from alcohol as NaN

In [14]:
wine.iloc[:3, wine.columns.get_loc('alcohol')] = np.nan
print(wine.head())

   alcohol  malic_acid  alcalinity_of_ash  magnesium  flavanoids  \
0      NaN        1.71               15.6      127.0        3.06   
1      NaN        1.78               11.2      100.0        2.76   
2      NaN        2.36               18.6        NaN        3.24   
3    14.37        1.95               16.8        NaN        3.49   
4    13.24        2.59               21.0        NaN        2.69   

   proanthocyanins   hue  
0             2.29  1.04  
1             1.28  1.05  
2             2.81  1.03  
3             2.18  0.86  
4             1.82  1.04  


### Step 7. Now set the value of the rows 3 and 4 of magnesium as NaN

In [15]:
wine.iloc[2:4, wine.columns.get_loc('magnesium')] = np.nan
print(wine.head())

   alcohol  malic_acid  alcalinity_of_ash  magnesium  flavanoids  \
0      NaN        1.71               15.6      127.0        3.06   
1      NaN        1.78               11.2      100.0        2.76   
2      NaN        2.36               18.6        NaN        3.24   
3    14.37        1.95               16.8        NaN        3.49   
4    13.24        2.59               21.0        NaN        2.69   

   proanthocyanins   hue  
0             2.29  1.04  
1             1.28  1.05  
2             2.81  1.03  
3             2.18  0.86  
4             1.82  1.04  


### Step 8. Fill the value of NaN with the number 10 in alcohol and 100 in magnesium

In [16]:
wine['alcohol'].fillna(10, inplace=True)
wine['magnesium'].fillna(100, inplace=True)

print(wine.head())

   alcohol  malic_acid  alcalinity_of_ash  magnesium  flavanoids  \
0    10.00        1.71               15.6      127.0        3.06   
1    10.00        1.78               11.2      100.0        2.76   
2    10.00        2.36               18.6      100.0        3.24   
3    14.37        1.95               16.8      100.0        3.49   
4    13.24        2.59               21.0      100.0        2.69   

   proanthocyanins   hue  
0             2.29  1.04  
1             1.28  1.05  
2             2.81  1.03  
3             2.18  0.86  
4             1.82  1.04  


### Step 9. Count the number of missing values

In [17]:
missing_values = wine.isnull().sum()
print(missing_values)

alcohol              0
malic_acid           0
alcalinity_of_ash    0
magnesium            0
flavanoids           0
proanthocyanins      0
hue                  0
dtype: int64


### Step 10.  Create an array of 10 random numbers up until 10

In [18]:
random_numbers = np.random.randint(0, 11, size=10)
print(random_numbers)


[3 5 0 4 6 2 5 0 8 0]


### Step 11.  Set the rows of the random numbers in the column

In [19]:
wine.loc[:9, 'alcohol'] = random_numbers

print(wine.head())

   alcohol  malic_acid  alcalinity_of_ash  magnesium  flavanoids  \
0      3.0        1.71               15.6      127.0        3.06   
1      5.0        1.78               11.2      100.0        2.76   
2      0.0        2.36               18.6      100.0        3.24   
3      4.0        1.95               16.8      100.0        3.49   
4      6.0        2.59               21.0      100.0        2.69   

   proanthocyanins   hue  
0             2.29  1.04  
1             1.28  1.05  
2             2.81  1.03  
3             2.18  0.86  
4             1.82  1.04  


### Step 12.  How many missing values do we have?

In [20]:
missing_values = wine.isnull().sum()
print(missing_values)


alcohol              0
malic_acid           0
alcalinity_of_ash    0
magnesium            0
flavanoids           0
proanthocyanins      0
hue                  0
dtype: int64


### Step 14. Print only the non-null values in alcohol

In [21]:
print(wine['alcohol'].dropna())

0       3.00
1       5.00
2       0.00
3       4.00
4       6.00
       ...  
173    13.71
174    13.40
175    13.27
176    13.17
177    14.13
Name: alcohol, Length: 178, dtype: float64


### Step 13. Delete the rows that contain missing values

In [22]:
wine_cleaned = wine.dropna()

print(wine_cleaned.head())

   alcohol  malic_acid  alcalinity_of_ash  magnesium  flavanoids  \
0      3.0        1.71               15.6      127.0        3.06   
1      5.0        1.78               11.2      100.0        2.76   
2      0.0        2.36               18.6      100.0        3.24   
3      4.0        1.95               16.8      100.0        3.49   
4      6.0        2.59               21.0      100.0        2.69   

   proanthocyanins   hue  
0             2.29  1.04  
1             1.28  1.05  
2             2.81  1.03  
3             2.18  0.86  
4             1.82  1.04  


### Step 15.  Reset the index, so it starts with 0 again

In [23]:
wine_cleaned = wine_cleaned.reset_index(drop=True)
print(wine_cleaned.head())

   alcohol  malic_acid  alcalinity_of_ash  magnesium  flavanoids  \
0      3.0        1.71               15.6      127.0        3.06   
1      5.0        1.78               11.2      100.0        2.76   
2      0.0        2.36               18.6      100.0        3.24   
3      4.0        1.95               16.8      100.0        3.49   
4      6.0        2.59               21.0      100.0        2.69   

   proanthocyanins   hue  
0             2.29  1.04  
1             1.28  1.05  
2             2.81  1.03  
3             2.18  0.86  
4             1.82  1.04  


### BONUS: Create your own question and answer it.

In [27]:
# change display 
pd.set_option('display.width', 7)
pd.set_option('display.max_columns', None)
print(wine)

     alcohol  \
0       3.00   
1       5.00   
2       0.00   
3       4.00   
4       6.00   
..       ...   
173    13.71   
174    13.40   
175    13.27   
176    13.17   
177    14.13   

     malic_acid  \
0          1.71   
1          1.78   
2          2.36   
3          1.95   
4          2.59   
..          ...   
173        5.65   
174        3.91   
175        4.28   
176        2.59   
177        4.10   

     alcalinity_of_ash  \
0                 15.6   
1                 11.2   
2                 18.6   
3                 16.8   
4                 21.0   
..                 ...   
173               20.5   
174               23.0   
175               20.0   
176               20.0   
177               24.5   

     magnesium  \
0        127.0   
1        100.0   
2        100.0   
3        100.0   
4        100.0   
..         ...   
173       95.0   
174      102.0   
175      120.0   
176      120.0   
177       96.0   

     flavanoids  \
0          3.06   
1         