# Índice Nacional de Precios al Consumidor / Índices Nacional de Precios al Productor

- Martínez Ostoa Néstor Iván
- Minería de Datos

## Objetivo
- Realizar una regresión lineal entre los dos índices

In [107]:
import numpy as np
import pandas as pd
import plotly.graph_objects as go

In [108]:
df = pd.read_csv('Datos.csv')

In [109]:
def basic_info(df):
    print('-------------BASIC DATAFRAME INFO-------------')
    print(f'Dataframe dimensions: {df.shape[0]} x {df.shape[1]}\n')
    print(f'Dataframe columns: {np.array(df.columns)}\n')
    print(f'Distribution of missing values ({df.isna().sum().sum()}):\n-------------\n{df.isna().sum()}\n')
    print(f'Dataframe info:\n-------------')
    df.info()

In [110]:
basic_info(df)

-------------BASIC DATAFRAME INFO-------------
Dataframe dimensions: 163 x 3

Dataframe columns: ['FECHA' 'IPP' 'IPC']

Distribution of missing values (0):
-------------
FECHA    0
IPP      0
IPC      0
dtype: int64

Dataframe info:
-------------
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 163 entries, 0 to 162
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   FECHA   163 non-null    object 
 1   IPP     163 non-null    float64
 2   IPC     163 non-null    float64
dtypes: float64(2), object(1)
memory usage: 3.9+ KB


In [111]:
df.describe()

Unnamed: 0,IPP,IPC
count,163.0,163.0
mean,82.589273,87.12247
std,13.484782,13.374782
min,60.430941,65.350564
25%,71.704062,75.607008
50%,79.083765,86.069626
75%,94.718714,99.074464
max,110.016963,113.682


In [112]:
df.head()

Unnamed: 0,FECHA,IPP,IPC
0,2008-01-01,60.430941,65.350564
1,2008-02-01,61.036649,65.544834
2,2008-03-01,61.702103,66.019891
3,2008-04-01,62.164034,66.170127
4,2008-05-01,62.645395,66.098635


In [113]:
df['FECHA'] = pd.to_datetime(df['FECHA'])
df.head()

Unnamed: 0,FECHA,IPP,IPC
0,2008-01-01,60.430941,65.350564
1,2008-02-01,61.036649,65.544834
2,2008-03-01,61.702103,66.019891
3,2008-04-01,62.164034,66.170127
4,2008-05-01,62.645395,66.098635


## Visualización de los datos

In [114]:
def gen_trace(df, x_col, y_col, color):
    return go.Scatter(
        x=df[x_col], y=df[y_col], marker_color=color,mode='markers+lines',
        name=y_col
    )

def graph_series(df, x_col, y_cols, title, colors, dark=False):
    fig = go.Figure()
    for idx, y_col in enumerate(y_cols):
        fig.add_trace(
            gen_trace(df, x_col, y_col, colors[idx])
        )
    fig.update_layout(
        title=dict(text=f'{title}'), template='plotly_dark' if dark else 'plotly_white', width=700
    )
    fig.update_xaxes(title=dict(text=f'{x_col}'))
    if len(y_col) == 1: fig.update_yaxes(title=dict(text=f'{y_col}'))
    fig.show()



In [115]:
graph_series(df, 'FECHA', ['IPC'], 'Índice Nacional de Precios al Consumidor', colors=['gold'])

In [116]:
graph_series(df, 'FECHA', ['IPP'], 'Índice Nacional de Precios al Productor', colors=['red'])

In [117]:
graph_series(df, 'FECHA', ['IPP', 'IPC'], 'INPC vs INPP', colors=['red', 'gold'])

In [118]:
fig = go.Figure()
fig.add_trace(go.Scatter(
    x=df['IPP'], y=df['IPC'], mode='markers', marker_color="turquoise"
))
fig.update_layout(
    template='plotly_white', width=700,
    title_text="Relación entre INPC e INPP"
)
fig.update_xaxes(title_text='INPP')
fig.update_yaxes(title_text='INPC')
fig.show()

## Regresión Lineal

In [120]:
from sklearn.linear_model import LinearRegression

In [125]:
X = df['IPC'].values.reshape(-1,1)
Y = df['IPP']
reg = LinearRegression().fit(X, Y)
print(f'Score: {reg.score(X, Y)}')
print(f'Intercept: {reg.intercept_}')
print(f'Coef: {reg.coef_[0]}')

Score: 0.9911713106091001
Intercept: -4.861118896463822
Coef: 1.0037639152986721


In [161]:
def beta(X, Y):
    Y_hat = np.mean(Y)
    X_hat = np.mean(X)
    return (Y_hat*np.sum(X) - np.sum(X.T@Y)) / (X_hat*np.sum(X) - np.sum(X.T@X))

def alpha(X, y, beta):
    y_hat = np.mean(y)
    x_hat = np.mean(X)
    return beta*x_hat - y_hat

In [162]:
b = beta(X, Y)
a = alpha(X, Y, b)
print(f'Intercept: {a}')
print(f'Coef: {b}')

Intercept: 4.861118896464106
Coef: 1.003763915298675
