In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import requests

url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'
r = requests.get(url, allow_redirects=True)

with open('iris.data', 'wb') as f:
    f.write(r.content)
columns = ['Sepal length', 'Sepal width', 'Petal length', 'Petal width', 'Class_labels']

df = pd.read_csv('iris.data', names=columns)
print(df.head())
print(df.columns)

print(df.describe())
print(df.info())

   Sepal length  Sepal width  Petal length  Petal width Class_labels
0           5.1          3.5           1.4          0.2  Iris-setosa
1           4.9          3.0           1.4          0.2  Iris-setosa
2           4.7          3.2           1.3          0.2  Iris-setosa
3           4.6          3.1           1.5          0.2  Iris-setosa
4           5.0          3.6           1.4          0.2  Iris-setosa
Index(['Sepal length', 'Sepal width', 'Petal length', 'Petal width',
       'Class_labels'],
      dtype='object')
       Sepal length  Sepal width  Petal length  Petal width
count    150.000000   150.000000    150.000000   150.000000
mean       5.843333     3.054000      3.758667     1.198667
std        0.828066     0.433594      1.764420     0.763161
min        4.300000     2.000000      1.000000     0.100000
25%        5.100000     2.800000      1.600000     0.300000
50%        5.800000     3.000000      4.350000     1.300000
75%        6.400000     3.300000      5.100000     

In [None]:
df.shape
print(df.loc[0])
print(df.loc[100])


Sepal length            5.1
Sepal width             3.5
Petal length            1.4
Petal width             0.2
Class_labels    Iris-setosa
Name: 0, dtype: object
Sepal length               6.3
Sepal width                3.3
Petal length               6.0
Petal width                2.5
Class_labels    Iris-virginica
Name: 100, dtype: object


In [None]:
print(df.loc[[10,100]])

     Sepal length  Sepal width  Petal length  Petal width    Class_labels
10            5.4          3.7           1.5          0.2     Iris-setosa
100           6.3          3.3           6.0          2.5  Iris-virginica


In [None]:
subset = df.loc[:, ['Sepal length', 'Sepal width']]
print(subset.head())

   Sepal length  Sepal width
0           5.1          3.5
1           4.9          3.0
2           4.7          3.2
3           4.6          3.1
4           5.0          3.6


In [None]:
subset2 = df.iloc[:, [2,4,-1]]
print(subset2.head())

   Petal length Class_labels Class_labels
0           1.4  Iris-setosa  Iris-setosa
1           1.4  Iris-setosa  Iris-setosa
2           1.3  Iris-setosa  Iris-setosa
3           1.5  Iris-setosa  Iris-setosa
4           1.4  Iris-setosa  Iris-setosa


In [None]:
list1 = list(range(5))
print(list1)

[0, 1, 2, 3, 4]


In [None]:
subset3 = df.iloc[:, list1]
print(subset3.head())

   Sepal length  Sepal width  Petal length  Petal width Class_labels
0           5.1          3.5           1.4          0.2  Iris-setosa
1           4.9          3.0           1.4          0.2  Iris-setosa
2           4.7          3.2           1.3          0.2  Iris-setosa
3           4.6          3.1           1.5          0.2  Iris-setosa
4           5.0          3.6           1.4          0.2  Iris-setosa


In [None]:
print(df.loc[20, 'Sepal width'])
print(df.iloc[20,2])

3.4
1.7


In [None]:
print(df.iloc[[0,99,109], [0,3]])

     Sepal length  Petal width
0             5.1          0.2
99            5.7          1.3
109           7.2          2.5


In [None]:
print(df.isnull().sum())

Sepal length    0
Sepal width     0
Petal length    0
Petal width     0
Class_labels    0
dtype: int64


In [None]:
df.loc[10:15, 'Sepal length'] = np.nan
df.loc[20:25, 'Petal width'] = np.nan
print(df.isnull().sum())

Sepal length    6
Sepal width     0
Petal length    0
Petal width     6
Class_labels    0
dtype: int64


In [None]:
df['Sepal length'].fillna(df["Sepal length"].mean(),inplace = True)
df['Petal width'].fillna(df["Petal width"].mean(),inplace = True)
print(df.isnull().sum())
# df.dropna(inplace = True)
# print(df.isnull().sum())

Sepal length    0
Sepal width     0
Petal length    0
Petal width     0
Class_labels    0
dtype: int64


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['Sepal length'].fillna(df["Sepal length"].mean(),inplace = True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['Petal width'].fillna(df["Petal width"].mean(),inplace = True)


In [None]:
#df.rename(columns = [])
df['Sepal length'] = df['Sepal length'].astype(float)
df['Petal width'] = df['Petal width'].astype(float)
print(df.head())

   Sepal length  Sepal width  Petal length  Petal width Class_labels
0           5.1          3.5           1.4          0.2  Iris-setosa
1           4.9          3.0           1.4          0.2  Iris-setosa
2           4.7          3.2           1.3          0.2  Iris-setosa
3           4.6          3.1           1.5          0.2  Iris-setosa
4           5.0          3.6           1.4          0.2  Iris-setosa


In [None]:
df['Class_labels'].replace({'Iris-setosa':0,'Iris-versicolor' :1, 'Iris-virginica':2}, inplace = True)
df.head()

Unnamed: 0,Sepal length,Sepal width,Petal length,Petal width,Class_labels
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0
