In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

## Let's start with some simple plots with Matplotlib!

In [None]:
### scatter plot

# Create the data
x = [1, 2, 3, 4, 5]
y = [2, 4, 5, 7, 9]

# Create the scatter plot
plt.scatter(x, y)

# Add axis labels and title
plt.xlabel('X')
plt.ylabel('Y')
plt.title('Simple Scatter Plot')

# Show the plot
plt.show()

In [None]:
### Pie chart

# Create the data for the pie chart
labels = ['Apples', 'Bananas', 'Cherries', 'Dates']
sizes = [30, 40, 10, 20]

# Create the pie chart
plt.pie(sizes, labels=labels)

# Add title
plt.title('Simple Pie Chart')

# Show the plot
plt.show()

In [None]:
### Custom colors

# Create the data for the pie chart
labels = ['Apples', 'Bananas', 'Cherries', 'Dates']
sizes = [30, 40, 10, 20]
#colors = ['green', 'orange', 'red', 'brown']
#colors = ['g', 'm', 'r', 'b']
colors = ['#E01FDD', '#E0821F', '#1FE022', '#1F7DE0']

# Create the pie chart
plt.pie(sizes, labels=labels, colors=colors,)

# Add title
plt.title('Simple Pie Chart')

# Add legend
plt.legend(labels)

# Show the plot
plt.show()

In [None]:
# Create the bar chart
plt.bar(labels, sizes)

# Add axis labels and title
plt.xlabel('Fruit')
plt.ylabel('Amount')
plt.title('Simple Bar Chart')

# Show the plot
plt.show()

In [None]:
### Line plots
# Generate some data
x = np.linspace(0, 10, 100) # x is an array (list)
y = np.sin(x) # y is in array (list) with same length as x

# Plot the data with Matplotlib
plt.plot(x, y)
plt.xlabel('X axis')
plt.ylabel('Y axis')
plt.title('Sine Wave')
plt.show()

In [None]:
### Several plots in one
# Generate some data
x = np.linspace(0, 10, 100) # x is an array (list)
y1 = np.sin(x) # y is in array (list) with same length as x
y2 = np.cos(x)

# Plot the data with Matplotlib
plt.plot(x, y1)
plt.plot(x, y2)
plt.xlabel('X axis')
plt.ylabel('Y axis')
plt.title('Sine and Cos Wave')
plt.show()

In [None]:
### Picking colors and adding legends
# Generate some data
x = np.linspace(0, 10, 100) # x is an array (list)
y1 = np.sin(x) # y is in array (list) with same length as x
y2 = np.cos(x)

# Plot the data with Matplotlib
plt.plot(x, y1, color='red')
plt.plot(x, y2, color='blue')
plt.xlabel('X axis')
plt.ylabel('Y axis')
plt.legend(['sin', 'cos'])
plt.title('Sine and Cos Wave')
plt.show()

In [None]:
### Trendlines (line plot and scatter plot in the same plot)

# generate some example data
x = np.arange(10)
y = np.random.random(10)

# plot the scatter plot
plt.scatter(x, y, c='blue')

# perform linear regression
slope, intercept = np.polyfit(x, y, 1)

# plot the trendline
plt.plot(x, slope * x + intercept, c='r')

# add labels and a legend
plt.xlabel('x')
plt.ylabel('y')
plt.legend(['data', 'trendline'])

# display the plot
plt.show()

### Let's explore some dataset!

In [None]:
## load the iris dataset

iris = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data',
                      names=['sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'class'])

In [None]:
## inspect it
iris.info()
iris.head()

## TO DO: create a scatter plot, with sepal_length and petal_length on the x- and y-axis, respecitvely. 

#### - What parameters can you change to make the plot look differently? (https://matplotlib.org/stable/api/_as_gen/matplotlib.pyplot.scatter.html)

#### - extra: Can you change the size of the dot so that it corresponds to petal_width?
#### - Can you change the color so every class has it's own color?


In [None]:
### your code here ....


## TO DO: create a pie plot with the number of entities in every class. 
#### Transform the same data into a bar chart. 

In [None]:
## your code here ....


## 3D data

In [None]:
flights = sns.load_dataset("flights")

In [None]:
flights

In [None]:
plt.bar(flights['month'], flights['passengers'])
plt.show()

In [None]:
flights = flights.pivot("month", "year", "passengers")

In [None]:
flights

In [None]:
plt.imshow(flights, cmap="YlGnBu")
plt.colorbar()
plt.xlabel("Year")
plt.ylabel("Month")
plt.title("Passenger Traffic")
plt.show()

In [None]:
months = flights.index.values
years = flights.columns.values

plt.imshow(flights, cmap="YlGnBu")
plt.colorbar()
plt.xlabel("Year")
plt.ylabel("Month")
plt.xticks(ticks = range(len(years)),labels=years, rotation=-30)
plt.yticks(ticks = range(len(months)),labels=months)
plt.title("Passenger Traffic")
plt.show()

# Part 2:

### Using seaborn to explore (large) datasets

In [None]:
iris.head()

In [None]:
## distributions
# Plot distribution: histogram
sns.histplot(data=iris, x='petal_length')
plt.show()

In [None]:
# density distribution
sns.displot(iris, x="petal_length", kind="kde")

In [None]:
# calculate the correlation matrix
corr = iris.corr()

# visualize it as a heat plot

sns.heatmap(corr, annot=True)
plt.show()

In [None]:
# set the range of the axes to -1, 1 and change the colormap

sns.heatmap(corr, annot=True, cmap='coolwarm', vmin=-1, vmax=1)
plt.show()

In [None]:
x = iris['petal_length']
y = iris['sepal_width']

plt.scatter(x, y,)
plt.show()

In [None]:
## plot only part of the dataset

# plotting only points with petal_length < 2:

plt.scatter(x=iris.loc[iris["petal_length"] < 2, "petal_length"],
            y=iris.loc[iris["petal_length"] < 2, "sepal_length"])

plt.xlabel("Sepal Length")
plt.ylabel("Petal Length")
plt.title("Scatter plot of Sepal Length vs. Petal Length")
plt.show()

In [None]:
## plot only part of the dataset

## ... or plot just a sample of 20 rows

subset = iris.sample(n=20)

# Create a scatter plot of sepal length vs. sepal width for the subset of data
plt.scatter(subset["sepal_length"], subset["sepal_width"])

# Set the title and axis labels
plt.title("Random Subset of Iris Data")
plt.xlabel("Sepal Length (cm)")
plt.ylabel("Sepal Width (cm)")

# Display the plot
plt.show()

In [None]:
# use pairplot for pairwise plots between all the numerical variables in a dataset
sns.pairplot(iris, hue="class")
plt.show()

## Try it yourself!
### using the "wine" dataset (or any dataset of your choice)

### - inspect the dataset, number of features, distribution, ...
### - find two features that have high correlation. Plot them as a scatter plot. What can you see?
### - find two features that have low correlation. Plot them as a scatter plot. What is different here?
### - plot a subset of the data, use filters.

In [None]:
wine = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/wine/wine.data', header = None)

col_names = ['Class', 'Alcohol', 'Malic acid', 'Ash', 'Alcalinity of ash', 'Magnesium', 'Total phenols',
             'Flavanoids', 'Nonflavanoid phenols', 'Proanthocyanins', 'Color intensity', 'Hue',
             'OD280/OD315 of diluted wines', 'Proline']
wine.columns = col_names
