import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
sns.set(color_codes=True)
%matplotlib inline
df = pd.read_csv('./iris.data')
df.head(5)
<style scoped>
.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}
</style>
.dataframe tbody tr th {
vertical-align: top;
}
.dataframe thead th {
text-align: right;
}
5.1 | 3.5 | 1.4 | 0.2 | Iris-setosa | |
---|---|---|---|---|---|
0 | 4.9 | 3.0 | 1.4 | 0.2 | Iris-setosa |
1 | 4.7 | 3.2 | 1.3 | 0.2 | Iris-setosa |
2 | 4.6 | 3.1 | 1.5 | 0.2 | Iris-setosa |
3 | 5.0 | 3.6 | 1.4 | 0.2 | Iris-setosa |
4 | 5.4 | 3.9 | 1.7 | 0.4 | Iris-setosa |
col_name = ['sepal length', 'sepal width', 'petal length', 'petal width', 'class']
df.columns = col_name
df.head()
<style scoped>
.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}
</style>
.dataframe tbody tr th {
vertical-align: top;
}
.dataframe thead th {
text-align: right;
}
sepal length | sepal width | petal length | petal width | class | |
---|---|---|---|---|---|
0 | 4.9 | 3.0 | 1.4 | 0.2 | Iris-setosa |
1 | 4.7 | 3.2 | 1.3 | 0.2 | Iris-setosa |
2 | 4.6 | 3.1 | 1.5 | 0.2 | Iris-setosa |
3 | 5.0 | 3.6 | 1.4 | 0.2 | Iris-setosa |
4 | 5.4 | 3.9 | 1.7 | 0.4 | Iris-setosa |
iris = sns.load_dataset('iris')
iris.head()
<style scoped>
.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}
</style>
.dataframe tbody tr th {
vertical-align: top;
}
.dataframe thead th {
text-align: right;
}
sepal_length | sepal_width | petal_length | petal_width | species | |
---|---|---|---|---|---|
0 | 5.1 | 3.5 | 1.4 | 0.2 | setosa |
1 | 4.9 | 3.0 | 1.4 | 0.2 | setosa |
2 | 4.7 | 3.2 | 1.3 | 0.2 | setosa |
3 | 4.6 | 3.1 | 1.5 | 0.2 | setosa |
4 | 5.0 | 3.6 | 1.4 | 0.2 | setosa |
df.describe()
<style scoped>
.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}
</style>
.dataframe tbody tr th {
vertical-align: top;
}
.dataframe thead th {
text-align: right;
}
sepal length | sepal width | petal length | petal width | |
---|---|---|---|---|
count | 149.000000 | 149.000000 | 149.000000 | 149.000000 |
mean | 5.848322 | 3.051007 | 3.774497 | 1.205369 |
std | 0.828594 | 0.433499 | 1.759651 | 0.761292 |
min | 4.300000 | 2.000000 | 1.000000 | 0.100000 |
25% | 5.100000 | 2.800000 | 1.600000 | 0.300000 |
50% | 5.800000 | 3.000000 | 4.400000 | 1.300000 |
75% | 6.400000 | 3.300000 | 5.100000 | 1.800000 |
max | 7.900000 | 4.400000 | 6.900000 | 2.500000 |
iris.describe()
<style scoped>
.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}
</style>
.dataframe tbody tr th {
vertical-align: top;
}
.dataframe thead th {
text-align: right;
}
sepal_length | sepal_width | petal_length | petal_width | |
---|---|---|---|---|
count | 150.000000 | 150.000000 | 150.000000 | 150.000000 |
mean | 5.843333 | 3.057333 | 3.758000 | 1.199333 |
std | 0.828066 | 0.435866 | 1.765298 | 0.762238 |
min | 4.300000 | 2.000000 | 1.000000 | 0.100000 |
25% | 5.100000 | 2.800000 | 1.600000 | 0.300000 |
50% | 5.800000 | 3.000000 | 4.350000 | 1.300000 |
75% | 6.400000 | 3.300000 | 5.100000 | 1.800000 |
max | 7.900000 | 4.400000 | 6.900000 | 2.500000 |
print(iris.info())
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 5 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 sepal_length 150 non-null float64
1 sepal_width 150 non-null float64
2 petal_length 150 non-null float64
3 petal_width 150 non-null float64
4 species 150 non-null object
dtypes: float64(4), object(1)
memory usage: 6.0+ KB
None
print(iris.groupby('species').size())
species
setosa 50
versicolor 50
virginica 50
dtype: int64
print(iris.all())
sepal_length True
sepal_width True
petal_length True
petal_width True
species True
dtype: bool
iris.count()
sepal_length 150
sepal_width 150
petal_length 150
petal_width 150
species 150
dtype: int64
sns.pairplot(iris, hue='species', height=2, aspect=1);
plt.show()
iris.hist(edgecolor='white', linewidth=2, figsize=(12, 8))
plt.show()
plt.figure(figsize=(12,8))
plt.subplot(2,2,1)
sns.violinplot(x='species', y='sepal_length', data=iris)
plt.subplot(2,2,2)
sns.violinplot(x='species', y='sepal_width', data=iris)
plt.subplot(2,2,3)
sns.violinplot(x='species', y='petal_length', data=iris)
plt.subplot(2,2,4)
sns.violinplot(x='species', y='petal_width', data=iris)
plt.show()
iris.boxplot(by='species', figsize=(12,8))
plt.show()
pd.plotting.scatter_matrix(iris, figsize=(12,8))
plt.show()