In [2]:
import pandas as pd
from sklearn.datasets import load_iris

In [3]:
# Load built-in dataset
iris = load_iris()

In [4]:
# Create DataFrame
df = pd.DataFrame(iris.data, columns=iris.feature_names)
df['target'] = iris.target

print("First 5 rows:")
print(df.head())

First 5 rows:
   sepal length (cm)  sepal width (cm)  petal length (cm)  petal width (cm)  \
0                5.1               3.5                1.4               0.2   
1                4.9               3.0                1.4               0.2   
2                4.7               3.2                1.3               0.2   
3                4.6               3.1                1.5               0.2   
4                5.0               3.6                1.4               0.2   

   target  
0       0  
1       0  
2       0  
3       0  
4       0  


In [5]:
print("\nLast 5 rows:")
print(df.tail())


Last 5 rows:
     sepal length (cm)  sepal width (cm)  petal length (cm)  petal width (cm)  \
145                6.7               3.0                5.2               2.3   
146                6.3               2.5                5.0               1.9   
147                6.5               3.0                5.2               2.0   
148                6.2               3.4                5.4               2.3   
149                5.9               3.0                5.1               1.8   

     target  
145       2  
146       2  
147       2  
148       2  
149       2  


In [6]:
print("\nShape of dataset:")
print(df.shape)


Shape of dataset:
(150, 5)


In [7]:
print("\nColumn names:")
print(df.columns)


Column names:
Index(['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)',
       'petal width (cm)', 'target'],
      dtype='object')


In [8]:
print("\nData types:")
print(df.dtypes)


Data types:
sepal length (cm)    float64
sepal width (cm)     float64
petal length (cm)    float64
petal width (cm)     float64
target                 int64
dtype: object


In [9]:
print("\nBasic statistics:")
print(df.describe())


Basic statistics:
       sepal length (cm)  sepal width (cm)  petal length (cm)  \
count         150.000000        150.000000         150.000000   
mean            5.843333          3.057333           3.758000   
std             0.828066          0.435866           1.765298   
min             4.300000          2.000000           1.000000   
25%             5.100000          2.800000           1.600000   
50%             5.800000          3.000000           4.350000   
75%             6.400000          3.300000           5.100000   
max             7.900000          4.400000           6.900000   

       petal width (cm)      target  
count        150.000000  150.000000  
mean           1.199333    1.000000  
std            0.762238    0.819232  
min            0.100000    0.000000  
25%            0.300000    0.000000  
50%            1.300000    1.000000  
75%            1.800000    2.000000  
max            2.500000    2.000000  


In [10]:
print("\nChecking for missing values:")
print(df.isnull().sum())


Checking for missing values:
sepal length (cm)    0
sepal width (cm)     0
petal length (cm)    0
petal width (cm)     0
target               0
dtype: int64


In [11]:
# Selecting columns
print("\nSelecting a single column:")
print(df['sepal length (cm)'].head())

print("\nSelecting multiple columns:")
print(df[['sepal length (cm)', 'petal length (cm)']].head())



Selecting a single column:
0    5.1
1    4.9
2    4.7
3    4.6
4    5.0
Name: sepal length (cm), dtype: float64

Selecting multiple columns:
   sepal length (cm)  petal length (cm)
0                5.1                1.4
1                4.9                1.4
2                4.7                1.3
3                4.6                1.5
4                5.0                1.4


In [12]:
# Filtering rows
print("\nFilter rows where target == 0:")
print(df[df['target'] == 0].head())


Filter rows where target == 0:
   sepal length (cm)  sepal width (cm)  petal length (cm)  petal width (cm)  \
0                5.1               3.5                1.4               0.2   
1                4.9               3.0                1.4               0.2   
2                4.7               3.2                1.3               0.2   
3                4.6               3.1                1.5               0.2   
4                5.0               3.6                1.4               0.2   

   target  
0       0  
1       0  
2       0  
3       0  
4       0  


In [13]:
# Adding a new column
df['sepal_area'] = df['sepal length (cm)'] * df['sepal width (cm)']
print("\nNew column added:")
print(df.head())


New column added:
   sepal length (cm)  sepal width (cm)  petal length (cm)  petal width (cm)  \
0                5.1               3.5                1.4               0.2   
1                4.9               3.0                1.4               0.2   
2                4.7               3.2                1.3               0.2   
3                4.6               3.1                1.5               0.2   
4                5.0               3.6                1.4               0.2   

   target  sepal_area  
0       0       17.85  
1       0       14.70  
2       0       15.04  
3       0       14.26  
4       0       18.00  


In [14]:
# GroupBy
print("\nGroup by target and calculate mean:")
print(df.groupby('target').mean())


Group by target and calculate mean:
        sepal length (cm)  sepal width (cm)  petal length (cm)  \
target                                                           
0                   5.006             3.428              1.462   
1                   5.936             2.770              4.260   
2                   6.588             2.974              5.552   

        petal width (cm)  sepal_area  
target                                
0                  0.246     17.2578  
1                  1.326     16.5262  
2                  2.026     19.6846  


In [15]:

# Sorting
print("\nSorted by sepal length:")
print(df.sort_values(by='sepal length (cm)', ascending=False).head())


Sorted by sepal length:
     sepal length (cm)  sepal width (cm)  petal length (cm)  petal width (cm)  \
131                7.9               3.8                6.4               2.0   
122                7.7               2.8                6.7               2.0   
118                7.7               2.6                6.9               2.3   
117                7.7               3.8                6.7               2.2   
135                7.7               3.0                6.1               2.3   

     target  sepal_area  
131       2       30.02  
122       2       21.56  
118       2       20.02  
117       2       29.26  
135       2       23.10  


In [16]:
# Value counts
print("\nTarget value counts:")
print(df['target'].value_counts())


Target value counts:
target
0    50
1    50
2    50
Name: count, dtype: int64


In [18]:

# Saving to CSV
df.to_csv("iris_processed.csv", index=False)
print("\nDataset saved as iris_processed.csv")


Dataset saved as iris_processed.csv
