# Índice Nacional de Precios al Consumidor / Índices Nacional de Precios al Productor

- Martínez Ostoa Néstor Iván
- Minería de Datos

## Objetivo
- Realizar una regresión lineal entre los dos índices

In [10]:
import numpy as np
import pandas as pd
import plotly.graph_objects as go

In [7]:
df = pd.read_csv('IPCyIPP.csv')

In [4]:
def basic_info(df):
    print('-------------BASIC DATAFRAME INFO-------------')
    print(f'Dataframe dimensions: {df.shape[0]} x {df.shape[1]}\n')
    print(f'Dataframe columns: {np.array(df.columns)}\n')
    print(f'Distribution of missing values ({df.isna().sum().sum()}):\n-------------\n{df.isna().sum()}\n')
    print(f'Dataframe info:\n-------------')
    df.info()

In [5]:
basic_info(df)

-------------BASIC DATAFRAME INFO-------------
Dataframe dimensions: 43 x 3

Dataframe columns: ['FECHA' 'IPC' 'IPP']

Distribution of missing values (0):
-------------
FECHA    0
IPC      0
IPP      0
dtype: int64

Dataframe info:
-------------
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 43 entries, 0 to 42
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   FECHA   43 non-null     object 
 1   IPC     43 non-null     float64
 2   IPP     43 non-null     float64
dtypes: float64(2), object(1)
memory usage: 1.1+ KB


In [8]:
df.describe()

Unnamed: 0,IPC,IPP
count,43.0,43.0
mean,105.192628,96.863976
std,4.23624,4.862853
min,98.795,88.109936
25%,102.6615,93.351076
50%,104.503,96.570414
75%,107.9905,99.193289
max,113.682,108.537444


In [6]:
df.head()

Unnamed: 0,FECHA,IPC,IPP
0,2018/01,98.795,93.54535
1,2018/02,99.171374,94.470017
2,2018/03,99.492157,94.902203
3,2018/04,99.154847,93.401625
4,2018/05,98.99408,94.641246


In [9]:
df['FECHA'] = pd.to_datetime(df['FECHA'])
df.head()

Unnamed: 0,FECHA,IPC,IPP
0,2018-01-01,98.795,93.54535
1,2018-02-01,99.171374,94.470017
2,2018-03-01,99.492157,94.902203
3,2018-04-01,99.154847,93.401625
4,2018-05-01,98.99408,94.641246


## Visualización de los datos

In [51]:
def gen_trace(df, x_col, y_col, color):
    return go.Scatter(
        x=df[x_col], y=df[y_col], marker_color=color,mode='markers+lines',
        name=y_col
    )

def graph_series(df, x_col, y_cols, title, colors):
    fig = go.Figure()
    for idx, y_col in enumerate(y_cols):
        fig.add_trace(
            gen_trace(df, x_col, y_col, colors[idx])
        )
    fig.update_layout(
        title=dict(text=f'{title}'), template='plotly_dark', width=700
    )
    fig.update_xaxes(title=dict(text=f'{x_col}'))
    if len(y_col) == 1: fig.update_yaxes(title=dict(text=f'{y_col}'))
    fig.show()



In [52]:
graph_series(df, 'FECHA', ['IPC'], 'Índice Nacional de Precios al Consumidor', colors=['gold'])

In [53]:
graph_series(df, 'FECHA', ['IPP'], 'Índice Nacional de Precios al Productor', colors=['red'])

In [54]:
graph_series(df, 'FECHA', ['IPC', 'IPP'], 'INPC vs INPP', colors=['red', 'gold'])