### Predictive modelling with Python

*Jure Žabkar*

*Thu, 25 April 2024*

---

### Importing data
(Help on [csv](https://pandas.pydata.org/pandas-docs/stable/user_guide/io.html#io-read-csv-table) and [excel](https://pandas.pydata.org/pandas-docs/stable/user_guide/io.html#io-excel-reader))

From **sklearn's internal datasets**, e.g.:

In [None]:
import pandas as pd
iris = pd.read_csv('datasets/Iris.csv')

In [None]:
iris

In [None]:
iris.head()

In [None]:
iris.info()

In [None]:
iris.describe()

In [None]:
from sklearn import tree
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
sns.scatterplot(x='PetalLengthCm', y='PetalWidthCm',  hue='Species', data=iris, )
plt.legend(loc='upper left')
plt.show()

In [None]:
sns.pairplot(iris.drop(['Id'], axis = 1), hue='Species', height=2)

In [None]:
plot = sns.FacetGrid(iris, hue="Species")
plot.map(sns.histplot, "SepalLengthCm").add_legend()
  
plot = sns.FacetGrid(iris, hue="Species")
plot.map(sns.histplot, "SepalWidthCm").add_legend()
  
plot = sns.FacetGrid(iris, hue="Species")
plot.map(sns.histplot, "PetalLengthCm").add_legend()
  
plot = sns.FacetGrid(iris, hue="Species")
plot.map(sns.histplot, "PetalWidthCm").add_legend()
  
plt.show()

In [None]:
plt.figure(figsize=(10,10))
      
plt.subplot(221)
sns.boxplot(x="Species", y='SepalLengthCm', data=iris)
  
plt.subplot(222)
sns.boxplot(x="Species", y='SepalWidthCm', data=iris)
  
plt.subplot(223)
sns.boxplot(x="Species", y='PetalLengthCm', data=iris)
  
plt.subplot(224)
sns.boxplot(x="Species", y='PetalWidthCm', data=iris)
  
plt.show()

In [None]:
feature_names = iris.columns[1:-1]
class_names = iris.columns[-1]
X = iris[iris.columns[1:-1]]
y = iris[iris.columns[-1]]
clf = tree.DecisionTreeClassifier(criterion='entropy')
clf = clf.fit(X, y)

In [None]:
fig = plt.figure(figsize=[20,10])
ax = plt.gca()
tree.plot_tree(clf, filled=True, ax=ax, class_names = class_names, feature_names=feature_names)
plt.show()

In [None]:
fig.savefig("iris_tree.png")