# Solutions for exercises

## Exercise 1

In [None]:
# the dataset is collected in "countries.csv"

import pandas as pd
d = pd.read_csv("countries.csv")
d

## Exercise 2

In [None]:
# Countries which became part of EU after 2000.
s = d[d["Year"] > 2000]
s

In [None]:
# Countries which became part of EU after 2000 and do not use Euro.
s = d[(d["Year"] > 2000) & (d["Euro"] == "No")]
s

In [None]:
# Countries which have area more than 200.000 sq. m.
s = d[d["Area"] > 200000]
s

In [None]:
# Countries which have a population density less than 50 people per sq. m.
s = d[d["Population"] / d["Area"] < 50]
s

In [None]:
# another solution
d["Density"] = d["Population"] / d["Area"]

s = d[d["Density"] < 50]
s

## Exercise 3

In [None]:
import matplotlib.pyplot as plt

b2000 = d[d["Year"] < 2000]
a2000 = d[d["Year"] >= 2000]

plt.figure(figsize=(14, 4))
plt.bar(b2000["Country"], b2000["Area"], color = "blue", label = "before 2000")
plt.bar(a2000["Country"], a2000["Area"], color = "green", label = "2000 or after")

plt.xticks(rotation=70)
plt.ylabel("Area, sq.m.")
plt.legend()


In [None]:
plt.scatter(d["Area"], d["Population"])
plt.xlabel("Area, sq.m")
plt.ylabel("Population")
plt.grid(color="#e0e0e0", linestyle=":")


## Exercise 4

Members
* $TP = 44$ (red apples in the accepted group)
* $FN = 2$ (red apples in the rejected group)
* $Sensitivity = 44 / (2 + 44) ≈ 0.96$ (percent of red apples correctly accepted)

Strangers
* $TN = 48$ (green apples in the rejected group)
* $FP = 6$ (green apples in the accepted group)
* $Specificity = 48 / (6 + 48) ≈ 0.89$ (percent of green apples correctly rejected)

Overall
* $Accuracy = (48 + 44) / (2 + 44 + 6 + 48) = 92 / 100 = 0.92$ (overall percent of correct decisions)

## Exercise 5

In [None]:
# copy of the function that makes a scatter plot

def iris_scatter(d, x = "PetalLength", y = "PetalWidth", marker = "x"):

    # make a dictionary with colors for each species
    colors = {"setosa": "red", "virginica": "blue", "versicolor": "green"}

    # get species values to separate list
    species = d["Species"]

    # make a loop over unique set of species values
    for s in species.unique():
        # create a subset
        ds = d[species == s]
        #show a plot for the subset
        plt.scatter(ds[x], ds[y], color=colors[s], label=s, marker=marker)

    # add legend, labels and title
    plt.legend()
    plt.xlabel(x)
    plt.ylabel(y)
    plt.title("Iris dataset")
    plt.grid(color = "lightgray", linestyle = ":")

In [None]:
# copy of the function which computes classification performance statistics
def class_stat(res, target_class):

    ref = res["Reference"]
    pred = res["Prediction"]

    TP = sum((ref == target_class) & (pred == target_class))
    TN = sum((ref != target_class) & (pred != target_class))
    FP = sum((ref != target_class) & (pred == target_class))
    FN = sum((ref == target_class) & (pred != target_class))

    sens = TP / (TP + FN)
    spec = TN / (TN + FP)
    acc = (TP + TN) / (TP + TN + FP + FN)

    # return all statistics in form of dictionary
    return {
        "target": target_class,
        "TP": TP,
        "TN": TN,
        "FP": FP,
        "FN": FN,
        "sens": sens,
        "spec": spec,
        "acc": acc,
    }

In [None]:
# load data and split to train and test sets

d = pd.read_csv("Iris.csv")

train_ind = d["Id"] % 5 != 0
test_ind = d["Id"] % 5 == 0

d_train = d.loc[train_ind]
d_test = d.loc[test_ind]

In [None]:
# show scatter for petal measurements

plt.figure(figsize=(10, 10))
iris_scatter(d_train, x = "PetalLength", y = "PetalWidth")

In [None]:
# new classifier with three conditions

def flower_classifier(flower):
    if flower["PetalLength"] < 2.5:
        return "setosa"
    elif flower["PetalWidth"] > 1.7:
        return "virginica"
    elif flower["PetalLength"] > 5.1:
        return "virginica"
    else:
        return "versicolor"

In [None]:
def df_classifier(d):
    predictions = []
    for index, flower in d.iterrows():
        predictions.append(flower_classifier(flower))
    return predictions

In [None]:
ref = d_test["Species"]
pred = df_classifier(d_test)
res = pd.DataFrame({"Reference": ref, "Prediction": pred})
res

In [None]:
stat =[]
for class_label in ref.unique():
    stat.append(class_stat(res, class_label))
stat