In [14]:
import pandas as pd
df=pd.read_csv("/content/finance_economics_dataset (2).csv")
print(df.head(10))

         Date Stock Index  Open Price  Close Price  Daily High  Daily Low  \
0  2000-01-01   Dow Jones     2128.75      2138.48     2143.70    2100.55   
1  2000-01-02     S&P 500     2046.82      2036.18     2082.83    2009.53   
2  2000-01-03   Dow Jones     1987.92      1985.26     2022.28    1978.37   
3  2000-01-04   Dow Jones     4625.02      4660.47     4665.26    4595.46   
4  2000-01-05     S&P 500     1998.18      1982.18     2044.31    1966.44   
5  2000-01-06     S&P 500     2087.80      2124.76     2153.18    2085.18   
6  2000-01-07   Dow Jones     4037.59      3996.40     4055.78    3948.97   
7  2000-01-08      NASDAQ     2798.96      2826.64     2864.47    2766.89   
8  2000-01-09   Dow Jones     4106.84      4141.59     4154.46    4062.99   
9  2000-01-10   Dow Jones     1261.46      1229.64     1296.14    1195.84   

   Trading Volume  GDP Growth (%)  Inflation Rate (%)  Unemployment Rate (%)  \
0         2670411           -0.37                6.06                   

In [None]:
from google.colab import drive
drive.mount('/content/drive')

**Data Cleaning**
**Step#1: Checking for null value**

In [2]:

print(df.isnull().sum())

Date                                     0
Stock Index                              0
Open Price                               0
Close Price                              0
Daily High                               0
Daily Low                                0
Trading Volume                           0
GDP Growth (%)                           0
Inflation Rate (%)                       0
Unemployment Rate (%)                    0
Interest Rate (%)                        0
Consumer Confidence Index                0
Government Debt (Billion USD)            0
Corporate Profits (Billion USD)          0
Forex USD/EUR                            0
Forex USD/JPY                            0
Crude Oil Price (USD per Barrel)         0
Gold Price (USD per Ounce)               0
Real Estate Index                        0
Retail Sales (Billion USD)               0
Bankruptcy Rate (%)                      0
Mergers & Acquisitions Deals             0
Venture Capital Funding (Billion USD)    0
Consumer Sp

**As there no Null Value so we don't need any method to remove them**

**Step#2: Check for duplicate value & remove them**

In [7]:
print(df.duplicated().sum())

0


**As there is no duplicate value so we don't need any method to remove them**

# New section

**Step#3: Check & fix Datatypes**

In [17]:
print(df.dtypes)

Date                                      object
Stock Index                               object
Open Price                               float64
Close Price                              float64
Daily High                               float64
Daily Low                                float64
Trading Volume                             int64
GDP Growth (%)                           float64
Inflation Rate (%)                       float64
Unemployment Rate (%)                    float64
Interest Rate (%)                        float64
Consumer Confidence Index                  int64
Government Debt (Billion USD)              int64
Corporate Profits (Billion USD)            int64
Forex USD/EUR                            float64
Forex USD/JPY                            float64
Crude Oil Price (USD per Barrel)         float64
Gold Price (USD per Ounce)               float64
Real Estate Index                        float64
Retail Sales (Billion USD)                 int64
Bankruptcy Rate (%) 

In [18]:
df['Date'] = pd.to_datetime(df['Date'])
df['Stock Index'] = df['Stock Index'].astype('category')

In [19]:
print(df.dtypes)

Date                                     datetime64[ns]
Stock Index                                    category
Open Price                                      float64
Close Price                                     float64
Daily High                                      float64
Daily Low                                       float64
Trading Volume                                    int64
GDP Growth (%)                                  float64
Inflation Rate (%)                              float64
Unemployment Rate (%)                           float64
Interest Rate (%)                               float64
Consumer Confidence Index                         int64
Government Debt (Billion USD)                     int64
Corporate Profits (Billion USD)                   int64
Forex USD/EUR                                   float64
Forex USD/JPY                                   float64
Crude Oil Price (USD per Barrel)                float64
Gold Price (USD per Ounce)                      

**Step#4: Standradize & Clean text data**

In [20]:
df['Stock Index'] = df['Stock Index'].str.strip()

# Optional: convert cleaned column to category for efficiency
df['Stock Index'] = df['Stock Index'].astype('category')

# If there are other text columns, apply similarly:
text_columns = df.select_dtypes(include='object').columns.tolist()



**Step#5: Handling Outliers**

**Using  IQR Method to check OUtliers**

In [21]:
# Select only numerical columns
numerical_cols = df.select_dtypes(include=['int64', 'float64']).columns

# Function to detect outliers using IQR
def detect_outliers_iqr(df, columns):
    outliers_dict = {}

    for col in columns:
        Q1 = df[col].quantile(0.25)
        Q3 = df[col].quantile(0.75)
        IQR = Q3 - Q1

        lower_bound = Q1 - 1.5 * IQR
        upper_bound = Q3 + 1.5 * IQR

        # Count outliers
        outliers = df[(df[col] < lower_bound) | (df[col] > upper_bound)]
        outliers_dict[col] = outliers.shape[0]

    return outliers_dict

# Detect outliers for all numerical columns
outlier_counts = detect_outliers_iqr(df, numerical_cols)
print(outlier_counts)  # Shows number of outliers in each column


{'Open Price': 0, 'Close Price': 0, 'Daily High': 0, 'Daily Low': 0, 'Trading Volume': 0, 'GDP Growth (%)': 0, 'Inflation Rate (%)': 0, 'Unemployment Rate (%)': 0, 'Interest Rate (%)': 0, 'Consumer Confidence Index': 0, 'Government Debt (Billion USD)': 0, 'Corporate Profits (Billion USD)': 0, 'Forex USD/EUR': 0, 'Forex USD/JPY': 0, 'Crude Oil Price (USD per Barrel)': 0, 'Gold Price (USD per Ounce)': 0, 'Real Estate Index': 0, 'Retail Sales (Billion USD)': 0, 'Bankruptcy Rate (%)': 0, 'Mergers & Acquisitions Deals': 0, 'Venture Capital Funding (Billion USD)': 0, 'Consumer Spending (Billion USD)': 0}
