## Importing Libraries

In [3]:
import warnings
warnings.filterwarnings("ignore")
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

## Importing Data

In [26]:
df1 = pd.read_csv("Consumer Price Index.csv")
df2 = pd.read_csv("Gross Domestic Product by Expenditure.csv")
df3 = pd.read_csv("Total Manufacturing Production for India .csv")
df4 = pd.read_csv("Exports of goods and services.csv")

#### Consumer Price Index

In [27]:
df1.head()

Unnamed: 0,DATE,CPALTT01INM659N
0,1960-01-01,4.298643
1,1960-02-01,3.363229
2,1960-03-01,3.393665
3,1960-04-01,4.298643
4,1960-05-01,3.340757


#### Gross Domestic Product by Expenditure

In [28]:
df2.head()

Unnamed: 0,DATE,NAEXKP07INQ652S
0,1996-04-01,770200100000.0
1,1996-07-01,917760700000.0
2,1996-10-01,908462600000.0
3,1997-01-01,942204700000.0
4,1997-04-01,915694400000.0


#### Total Manufacturing Production for India

In [29]:
df3.head()

Unnamed: 0,DATE,PRMNTO01INQ657S
0,1994-07-01,3.226083
1,1994-10-01,3.335921
2,1995-01-01,3.577195
3,1995-04-01,1.932583
4,1995-07-01,5.203285


#### Exports of goods and services

In [30]:
df4.head()

Unnamed: 0,DATE,INDEXPORTQDSMEI
0,1996-04-01,362009200000.0
1,1996-07-01,361455100000.0
2,1996-10-01,362368500000.0
3,1997-01-01,373417800000.0
4,1997-04-01,406519900000.0


Making all datasets start from the same date.

In [31]:
df1 = df1[df1.DATE > '1996-04-01']
df2 = df2[df2.DATE > '1996-04-01']
df3 = df3[df3.DATE > '1996-04-01']
df4 = df4[df4.DATE > '1996-04-01']

Converting DATE to datetime object

In [32]:
df1['DATE'] = pd.to_datetime(df1['DATE'])
df2['DATE'] = pd.to_datetime(df2['DATE'])
df3['DATE'] = pd.to_datetime(df3['DATE'])
df4['DATE'] = pd.to_datetime(df4['DATE'])

Converting Date to Index

In [33]:
df1.set_index('DATE',inplace=True)
df2.set_index('DATE',inplace=True)
df3.set_index('DATE',inplace=True)
df4.set_index('DATE',inplace=True)

Converting Monthly data to Quarterly data

In [34]:
df1 = df1.resample('Q',closed='right',label='right',
             loffset=pd.DateOffset(days=1,) ).mean()

Creating new DataFrame

In [36]:
dates = pd.date_range(start='1996-07-01', end='2023-01-01', freq='QS')

In [37]:
data = pd.DataFrame(index = dates)

In [38]:
data['GDP'] = df2.iloc[:,-1]
data['Exports'] = df4.iloc[:,-1]
data['Manufacturing'] = df3.iloc[:,-1]
data['CPI'] = df1.iloc[:,-1]

In [39]:
data.head()

Unnamed: 0,GDP,Exports,Manufacturing,CPI
1996-07-01,917760700000.0,361455100000.0,-0.590751,9.078431
1996-10-01,908462600000.0,362368500000.0,0.197451,8.570983
1997-01-01,942204700000.0,373417800000.0,2.288496,9.198929
1997-04-01,915694400000.0,406519900000.0,2.771743,10.633984
1997-07-01,901406200000.0,404048500000.0,1.762883,7.727646


In [40]:
data.isnull().sum()

GDP              0
Exports          0
Manufacturing    0
CPI              0
dtype: int64

No null values in the final data

In [41]:
data.shape

(107, 4)

The data has 107 data points. For every quarter from July 1996 to January 2023. It has 4 features. 
- Gross Domestic Product
- Exports of goods and services in India
- Manufacturing Production for India
- Consumer Price Index

Saving the data to a separate CSV file.

In [42]:
data.to_csv("recession_data.csv")