# MATPLOTLIB: the module to plot data 

In [None]:
import matplotlib.pyplot as plt
import numpy as np


In [None]:
x = np.linspace(0, 10, 100)
y1 = np.cos(x-10)
y2 = np.cos(x)

fig = plt.figure()   # create a container that host the picture

plt.plot(x, y1, '-')
plt.plot(x, y2, '--');

In [None]:
# save fig
fig.savefig('my_figure.png')

In [None]:
!ls -lh my_figure.png

In [None]:
!pwd

we can import an image

In [None]:
from IPython.display import Image
Image('my_figure.png')

we can change colors

In [None]:
plt.plot(x, x + 0, linestyle='solid')
plt.plot(x, x + 1, linestyle='dashed')
plt.plot(x, x + 2, linestyle='dashdot')
plt.plot(x, x + 3, linestyle='dotted');

# For short, you can use the following codes:
plt.plot(x, x + 4, linestyle='-')  # solid
plt.plot(x, x + 5, linestyle='--') # dashed
plt.plot(x, x + 6, linestyle='-.') # dashdot
plt.plot(x, x + 7, linestyle=':');  # dotted

In [None]:
plt.plot(x, np.sin(x - 0), linestyle='dashed',color='blue')        # specify color by name
plt.plot(x, np.sin(x - 1), color='g')           # short color code (rgbcmyk)
plt.plot(x, np.sin(x - 2), color='0.75')        # Grayscale between 0 and 1
plt.plot(x, np.sin(x - 3), color='#FFDD44')     # Hex code (RRGGBB from 00 to FF)
plt.plot(x, np.sin(x - 4), color=(1.0,0.2,0.3)) # RGB tuple, values 0 to 1
plt.plot(x, np.sin(x - 5), color='chartreuse'); # all HTML color names supported

combine

In [None]:
plt.plot(x, x + 0, '-g')  # solid green
plt.plot(x, x + 1, '--c') # dashed cyan
plt.plot(x, x + 2, '-.k') # dashdot black
plt.plot(x, x + 3, ':r');  # dotted red

In [None]:
# set xlim and ylim and a grid

plt.plot(x, y1)

plt.xlim(-10, 11)
plt.ylim(-10, 10);
plt.grid(True)


Another useful method is ``plt.axis()`` (be careful about possible confusion between *axes* with an *e*, and *axis* with an *i*).
The ``plt.axis()`` method allows you to set the ``x`` and ``y`` limits with a single call, passing a list specifying ``[xmin, xmax, ymin, ymax] `` :

In [None]:
plt.plot(x, y1)
plt.axis([-1, 11, -1.5, 1.5]);

Let's see how to plot information on Cartesian axes

In [None]:
plt.plot(x, y1)
plt.title("Spring motion")
plt.xlabel("Time (s)")
plt.ylabel("Space(m)");

When multiple lines are displayed, it may be helpful to create a chart legend that labels each line type.
Again, Matplotlib has a built-in way to quickly create such a legend.
It is done via the ``plt.legend()`` method.
While there are several valid ways to use it, I find it easiest to specify the label of each row using the plot function's ``label`` keyword:

In [None]:
plt.plot(x, np.sin(x), '-g', label='sin(x)')
plt.plot(x, np.cos(x), ':b', label='cos(x)')

#plt.legend();
plt.legend(loc='lower right', fontsize=8);

# SOME OTHER EXAMPLE

In [None]:
x = np.linspace(0,15,30)
y = np.sin(x) + 0.1*np.random.randn(len(x))

In [None]:
# lw and ms parameters are used to control the line width and marker size, respectively, in plots.

plt.plot(x,y, 'o--', color='purple', lw=2, ms=8)

Figure size

In [None]:
# resize image
plt.figure(figsize=(11,10))
plt.plot(x,y)

In [None]:
plt.figure(figsize=(8,3))
plt.plot(x,y, label='Component 1')
plt.xlabel('Time [s]', fontsize=16)
plt.ylabel('Voltage [V]')
plt.title('Voltage in 3rd Electrode')
plt.legend(loc='lower right', fontsize=12)
plt.show()

# Exercise 1: Plot a Sine Wave

### Task:
1. Import `matplotlib.pyplot` and `numpy`.
2. Create a plot of the sine function $( y = \sin(x) $) for $( x $) values ranging from $( -2\pi $) to $( 2\pi $).
3. Label the axes and give the plot a title.
4. Add a grid to the plot.

# Exercise 2: Plot a Parabola

### Task:
1. Plot the quadratic function $( y = x^2 $) for $( x $) values ranging from $(-10$) to $(10$).
2. Use a dashed red line for the plot.
3. Add a legend with the label "y = x^2".
4. Customize the x-axis and y-axis limits to display the plot properly.
5. Add a title and axis labels.


# Let's try to use matplot lib with pandas to plot some data
We load a file with the export of commodities

In [None]:
import pandas as pd

In [None]:
url_value = 'https://raw.githubusercontent.com/pal-dev-labs/Python-for-Economic-Applications-2024-2025/refs/heads/main/Data/Europe_Value.csv'
export_value=pd.read_csv(url_value)

In [None]:
export_value

In [None]:
export_value.info()

I want to plot export of crustaceans and fishes for Italy

In [None]:
export_value=export_value.dropna(axis=1)

In [None]:
export_value

In [None]:
export_value[ export_value['Land Area']=='Italy']

In [None]:
filter_italy=export_value['Land Area']=='Italy'
export_italy=export_value[filter_italy]
export_italy

We can get transform the value into an ndarray (Numpy object) with the method **values**

In [None]:

export_italy.iloc[1,3:].values

In [None]:
exp_fish = export_italy.iloc[1,3:].values
exp_crustaceans=export_italy.iloc[0,3:].values

In [None]:
exp_crustaceans

In [None]:
plt.figure(figsize=(6,6))
plt.title('Italy exports')
plt.xlabel('Year')
plt.ylabel('Value (Thousand Euro)')
plt.plot(exp_fish, 'o-', label='Fish')
plt.plot(exp_crustaceans, 'o-', label='Crustaceans')
plt.legend()

How do I fix the abscissa? I create a list of years ad hoc

In [None]:
exp_fish.shape[0]

In [None]:
num_value = exp_fish.shape[0]
years = np.arange(2000,2000+num_value)

In [None]:
years

In [None]:
plt.title('Italy exports')
plt.xlabel('Year')
plt.ylabel('Value (Millions Euro)')
plt.plot(years,exp_fish, 'o-', label='Fish')
plt.plot(years,exp_crustaceans, 'o-', label='Crustaceans')
plt.legend()

# Exercise
Plot the exports of fish and shellfish for France and Spain

# Now I want to plot the export of fish for all EU countries

In [None]:
export_value.head(10)

First, I create a filter for **Fish** in Commodities **AND** **Export** in Trade Flow

In [None]:
fish_filter = export_value['Commodity']=='Fish'
trade_filter = export_value['Trade flow']=='Export'
# I create the total filter with and operator
tot_filter = fish_filter & trade_filter

export_value_fish = export_value[tot_filter]

In [None]:
export_value_fish.info

I have many countries. I want to plot the **top 10 exporters**

How do I select them? I have to identify a criterion.  
I first consider the average export between 2000 and 2015 for each countries and then select the **top 10 countries with the highest average export**


Let's consider the export for each countries. In the rows we have the country and in the column the level of export

In [None]:
export_value_fish.iloc[:, 3:]

We can use the method **mean(axis=1)** to calculate the average.  
Axis=1 mean that it fix a row, and then perform the average through all columns  
Axis = 0 mean that it fix a column, and then perform the average through all rows 

In [None]:
export_averages = export_value_fish.iloc[:, 3:].mean(axis=1)
export_averages

Now we can **add a column** with the averages to our dataframe

In [None]:
export_value_fish['Mean']=export_averages

In [None]:
export_value_fish

Now I want to extract the first 10 exporter  
I start sorting the rows

In [None]:
exp_sorted = export_value_fish.sort_values('Mean', ascending=False)

In [None]:
exp_sorted[['Land Area','Mean']]

We extract the first 10 rows

In [None]:
exp_sor_final = exp_sorted.iloc[0:10,:]
exp_sor_final[['Land Area','Mean']]

How do I plot them? Let's see the first one

In [None]:
plt.plot(years,exp_sor_final.iloc[0,3:19])

I want to plot them all

In [None]:
# I create an ndarray with the name of the countries
countries = exp_sor_final['Land Area'].values
countries

In [None]:
index = 0
for i in countries:
    plt.plot(years,exp_sor_final.iloc[index,3:19])
    index += 1

Let's be a little nicer

In [None]:
plt.figure(figsize=(11,8))
index = 0
for i in countries:
    plt.plot(years,exp_sor_final.iloc[index,3:19], 'o-', label=i)
    index += 1
plt.title('Top 10 European exports')
plt.xlabel('Year')
plt.ylabel('Value (Millions Euro)')
plt.legend()

# SCATTER PLOT

Another commonly used type of graph is the SCATTER PLOT. Instead of points joined by line segments, here the points are represented individually with a dot, circle, or other shape.

# SCATTER PLOT CON ``plt.plot``

In [None]:
x = np.linspace(0, 10, 30)
y = np.sin(x)

plt.plot(x, y, 'o', color='black');

Let's see the possible markers

In [None]:
rng = np.random.RandomState(0)
for marker in ['o', '.', ',', 'x', '+', 'v', '^', '<', '>', 's', 'd']:
    plt.plot(rng.rand(5), rng.rand(5), marker,
             label="marker='{0}'".format(marker))
plt.legend(numpoints=1)
plt.xlim(0, 1.8);

More commands

In [None]:
plt.plot(x, y, '-p', color='gray',
         markersize=11, linewidth=4,
         markerfacecolor='white',
         markeredgecolor='blue',
         markeredgewidth=2)
plt.ylim(-1.2, 1.2);

## Scatter Plots with ``plt.scatter``

In [None]:
plt.scatter(x, y, marker='o');

The main difference between ``plt.scatter`` and ``plt.plot`` is that the former can be used to create scatter plots where the properties of each individual point (size, face color, edge color, etc. .) can be individually controlled or mapped to data.

Let's see an example
To better see the overlaid results, we will also use the ``alpha`` keyword to adjust the transparency level:

In [None]:
rng.randn(100)

In [None]:
rng = np.random.RandomState(0)
x = rng.randn(100) # generate 100 random numbers between -1 and 1
y = rng.randn(100)
colors = rng.rand(100) # generate 100 random numbers betwee 0 and 1
sizes = 1000 * rng.rand(100) # generate 100 random numbers betwee 0 and 1000

plt.scatter(x, y, c=colors, s=sizes, alpha=0.2,
            cmap='viridis')
plt.colorbar();  # show color scale

Note that the color argument is automatically mapped to a color scale (shown here by the ``colorbar()`` command), and that the size argument is given in pixels.
In this way, the color and size of the dots can be used to convey information in the visualization, in order to display multidimensional data.

For example, we could use iris data from Scikit-Learn Module, where each sample is one of three types of flowers whose petal and sepal size has been carefully measured:

In [None]:
from sklearn.datasets import load_iris
iris = load_iris()
features = iris.data.T

# Create a DataFrame with the target data and labels
df = pd.DataFrame(data=iris.data, columns=iris.feature_names)
df['target'] = iris.target
# Add a "species" column that maps target numbers to their respective species
df['species'] = df['target'].map({0: iris.target_names[0], 1: iris.target_names[1], 2: iris.target_names[2]})
# Remove the target column, which is not needed
df = df.drop('target', axis=1)

df

In [None]:
df['species'].unique()

In [None]:
plt.scatter(features[0], features[1], alpha=0.4,
            s=100*features[3], c=iris.target, cmap='viridis')
plt.xlabel(iris.feature_names[0])
plt.ylabel(iris.feature_names[1]);

# Three-Dimensional Plotting in Matplotlib

Matplotlib was initially designed with only two-dimensional printing in mind.
Three-dimensional plots are enabled by importing the ``mplot3d`` toolkit, included with the main Matplotlib installation:

In [None]:
from mpl_toolkits import mplot3d

In [None]:
fig = plt.figure()
ax = plt.axes(projection='3d')

## Three-dimensional points and lines

The most basic three-dimensional graph is a line or collection of scatter plots created from sets of triples (x, y, z).
In analogy with the more common two-dimensional plots discussed above, these can be created using the ``ax.plot3D`` and ``ax.scatter3D`` functions.

Let's try drawing a trigonometric spiral, along with some points drawn randomly near the line:

In [None]:
plt.figure(figsize=(10,10))

ax = plt.axes(projection='3d')

# Data for a three-dimensional line
zline = np.linspace(0, 15, 1000)
xline = np.sin(zline)
yline = np.cos(zline)
ax.plot3D(xline, yline, zline, 'gray')

# Data for three-dimensional scattered points
zdata = 15 * np.random.random(100)
xdata = np.sin(zdata) + 0.1 * np.random.randn(100)
ydata = np.cos(zdata) + 0.1 * np.random.randn(100)
ax.scatter3D(xdata, ydata, zdata, c=zdata, cmap='Greens');

Note that by default, scatter points have their transparency adjusted to give a sense of depth to the page.
While the three-dimensional effect is sometimes difficult to see within a static image, an interactive view can lead to some nice intuition about the arrangement of dots.

In [None]:
def f(x, y):
    return np.sin(np.sqrt(x ** 2 + y ** 2))

x = np.linspace(-6, 6, 30)
y = np.linspace(-6, 6, 30)

X, Y = np.meshgrid(x, y)
Z = f(X, Y)

In [None]:
# %matplotlib inline
# %matplotlib notebook


In [None]:
%matplotlib inline
fig = plt.figure()
#plt.figure(figsize=(10,10))
ax = plt.axes(projection='3d')
ax.contour3D(X, Y, Z, 50, cmap='viridis')
ax.set_xlabel('x')
ax.set_ylabel('y')
ax.set_zlabel('z');

Sometimes the default viewing angle is not optimal, in which case we can use the view_init method to set the elevation and azimuth angles. In the following example, we will use an elevation of 60 degrees (i.e. 60 degrees above the x-y plane) and an azimuth of 35 degrees (i.e. rotated 35 degrees counterclockwise from the z-axis):

# Density and Contour Plots

In [None]:
def f(x, y):
    return np.sin(x) ** 10 + np.cos(10 + y * x) * np.cos(x)

In [None]:
x = np.linspace(0, 5, 50)
y = np.linspace(0, 5, 40)

X, Y = np.meshgrid(x, y)
Z = f(X, Y)

In [None]:
plt.figure(figsize=(10,10))
ax = plt.axes(projection='3d')
ax.plot_surface(X, Y, Z, rstride=1, cstride=1,
                cmap='ocean', edgecolor='none')
ax.set_title('surface');

A contour plot can be created with the ``plt.contour`` function.
It takes three arguments: a grid of *x* values, a grid of *y* values, and a grid of *z* values.
The *x* and *y* values represent the positions on the graph and the *z* values will be represented by the contour levels.
The simplest way to prepare such data is to use the ``np.meshgrid`` function, which constructs two-dimensional grids from one-dimensional arrays:

In [None]:
plt.contour(X, Y, Z, colors='black');

In [None]:
plt.contour(X, Y, Z, 20, cmap='RdGy');

The color bar makes it clear that the black regions are "peaks", while the red regions are "valleys".

# INTRODUCING TO STATISTICAL TOOLS

In [None]:
url = 'https://raw.githubusercontent.com/pal-dev-labs/Python-for-Economic-Applications-2024-2025/refs/heads/main/Data/Advertising.csv'
advertising = pd.read_csv(url, usecols=[1,2,3,4])
advertising.info()

In [None]:
advertising

The **Advertising data set** consists of the **sales** of that product in 200 different markets, along with **advertising budgets** for the product in each of those markets for three different media: **TV, radio, and newspaper**.  

We would like to have a prediction for **Sales** based on the 3 predictors **TV, Radio, and Newspaper** budgets

Let's try to have an understanding of the potential relations between Sales and predictors

In [None]:
plt.figure(figsize=(6,4))
plt.scatter(advertising['TV'].values, advertising['Sales'].values)
plt.xlabel("TV (x1000€)")
plt.ylabel("Sales (x1000 Unit)")
plt.title('Sales vs investment in TV advertising')

In [None]:
plt.figure(figsize=(6,4))
plt.scatter(advertising['Radio'].values, advertising['Sales'].values)
plt.xlabel("Radio (x1000€)")
plt.ylabel("Sales (x1000 Unit)")
plt.title('Sales vs investment in Radio advertising')

In [None]:
plt.figure(figsize=(6,4))
plt.scatter(advertising['Newspaper'].values, advertising['Sales'].values)
plt.xlabel("Newspaper (x1000€)")
plt.ylabel("Sales (x1000 Unit)")
plt.title('Sales vs investment in Newspaper advertising')

## We can also use Seaborn Library
Seaborn is a Python data visualization library based on matplotlib. It provides a high-level interface for drawing attractive and informative statistical graphics.

In [None]:
import seaborn as sns
# Seaborn is a Python data visualization library based on matplotlib. 
# It provides a high-level interface for drawing attractive and informative statistical graphics.
sns.set_theme()

In [None]:

# let's have a look a the data first...
sns.pairplot(advertising, height=1.5);

We would like to use a **Statistical Learning Algorithm** to identify a nice predictor.

It means that, if we identify Sales with variable $Y$ and predictors with variable $X$
technically speaking, we are looking for a function $h: X → Y$.

We can start with the function
$h(x) = y = w_0 + w_1 x$

Our model is so a LINEAR REGRESSION model with parameters $w_0, w_1$

Let's identify the TRAINING SET

In [None]:
x_train = advertising['TV'].values
x_train =x_train.reshape(-1,1)
# target
y_train = advertising['Sales'].values
y_train =y_train.reshape(-1,1)

In [None]:
y_train[0:10]

In [None]:
print(x_train.shape, y_train.shape)

Let's import the **LinearRegression** class from the **sklearn** module

In [None]:
from sklearn.linear_model import LinearRegression


Create the model

In [None]:
model = LinearRegression()

Training phase

In [None]:
model.fit(x_train, y_train)

Let's print the values of the coefficients

In [None]:
w0 = model.intercept_[0]
w1 = model.coef_[0][0]

print('Il valore di w0 è: ',w0)
print('Il valore di w1 è: ',w1)


In [None]:
def h(x, w0, w1):
    return w0+w1*x

In [None]:
p2

In [None]:
### plt.figure(figsize=(8,5))
plt.scatter(advertising['TV'].values, advertising['Sales'].values)
plt.xlabel("TV (x1000€)")
plt.ylabel("Sales (x1000 Unit)")
plt.title('Sales vs investment in TV advertising')
# punti x 
x0 = 0; x1 = 300

p1 = np.array([x0,x1])  # punto (x0, x1)
p2 = np.array([h(x0,w0,w1),h(x1,w0,w1)]) # punto (h(x0), h(x1))

plt.plot(p1,p2, '-r', lw=2, label='h(x)='+str(w0)+' +'+str(w1)+'x')
plt.legend();

Let's try to make some **PREDICTIONS**

How many sales can I make if I invest €134,000 in TV advertising? And if I invest €230,000?

In [None]:
xp = np.array([134,230]).reshape(-1,1)
yp = model.predict(xp)

In [None]:
yp

In [None]:
plt.figure(figsize=(8,5))
plt.scatter(advertising['TV'].values, advertising['Sales'].values)
plt.xlabel("TV (x1000€)")
plt.ylabel("Sales (x1000 Unit)")
plt.title('Sales vs investment in TV advertising')
# punti x 
x0 = 0; x1 = 300

p1 = np.array([x0,x1])  # punto (x0, x1)
p2 = np.array([h(x0,w0,w1),h(x1,w0,w1)]) # punto (h(x0), h(x1))

plt.plot(p1,p2, '-r', lw=2, label='h(x)='+str(w0)+' +'+str(w1)+'x')


plt.plot(xp,yp, 'o', color='purple', ms=11)

plt.legend();


# NOW YOU TRY.
- How many units can I sell with an investment of €20,000 in radio?
- How many units can I sell with an investment of €62,000 in newspaper?