<h1> Pandas Data Type Inference and Conversion (Backend Task) </h1>

In [33]:
import pandas as pd

In [34]:
df = pd.read_csv('test.csv')
df


Unnamed: 0,Name,Birthdate,Score,Grade
0,Alice,1999-12-01,90,A


In [35]:
print(df.dtypes)

Name         object
Birthdate    object
Score         int64
Grade        object
dtype: object


In [36]:
def infer_and_convert_data_types(df):
    for col in df.columns:
        # Attempt to convert to numeric first
        df_converted = pd.to_numeric(df[col], errors='coerce')
        if not df_converted.isna().all():  # If at least one value is numeric
            df[col] = df_converted
            continue

        # Attempt to convert to datetime
        try:
            df[col] = pd.to_datetime(df[col])
            continue
        except (ValueError, TypeError):
            pass
        

        # Check if the column should be categorical
        if len(df[col].unique()) / len(df[col]) < 0.5:  # Example threshold for categorization
            df[col] = pd.Categorical(df[col])

    return df

df = infer_and_convert_data_types(df)

print("\nData types after inference:")
print(df.dtypes)


Data types after inference:
Name                 object
Birthdate    datetime64[ns]
Score                 int64
Grade                object
dtype: object


In [10]:
df

Unnamed: 0,Name,Birthdate,Score,Grade
0,Alice,1990-01-01,90.0,A
1,Bob,1991-02-02,75.0,B
2,Charlie,1992-03-03,85.0,A
3,David,1993-04-04,70.0,B
4,Eve,1994-05-05,,A


In [68]:
df.isna().sum()

Name         0
Birthdate    0
Score        1
Grade        0
dtype: int64