In [1]:
import pandas as pd
import numpy as np
from datetime import datetime

df = pd.DataFrame({
    'string_column': ["these", "are", "strings"],
    'int_column': [1, 2, 3],
    'float_column': [0.5, 0.3, 0.2],
    'date_column': [datetime(year=2022, month=1, day=1)]*3
})

df

Unnamed: 0,string_column,int_column,float_column,date_column
0,these,1,0.5,2022-01-01
1,are,2,0.3,2022-01-01
2,strings,3,0.2,2022-01-01


In [2]:
print(df['string_column'].dtype == str)
print(df['int_column'].dtype == np.int64)
print(df['float_column'].dtype == np.float64)

False
True
True


In [13]:
from typing import Any, Dict
import pandas as pd
import numpy as np

df = pd.DataFrame({
    'string_column': ["these", "are", "strings"],
    'int_column': [1, 2, 3],
    'float_column': [0.5, 0.3, 0.2],
})

target_schema = {
    'string_column': str,
    'int_column': np.int64,
    'float_column': np.float64
}

def has_correct_schema(df: pd.DataFrame, target_schema: Dict[str, Any]) -> bool:
    columns_have_correct_dtype = []
    for colname in target_schema.keys():
        actual_dtype = df[colname].dtype
        target_dtype = target_schema[colname]
        is_correct_dtype = actual_dtype == target_dtype
        if not is_correct_dtype:
            print(f"'{colname}' is of type {actual_dtype} but should be {target_dtype}")
        columns_have_correct_dtype.append(is_correct_dtype)
    return all(columns_have_correct_dtype)

has_correct_schema(df=df, target_schema=target_schema)

# Returns:
# -------
# 'string_column' is of type object but should be <class 'str'>
# False

'string_column' is of type object but should be <class 'str'>


False

In [15]:
df_dtypes = df.dtypes.reset_index(name="dtype")
df_dtypes

Unnamed: 0,index,dtype
0,string_column,object
1,int_column,int64
2,float_column,float64
3,date_column,datetime64[ns]


In [21]:
df_dtypes[df_dtypes['index'] == 'string_column']['dtype'].values

array([dtype('O')], dtype=object)