# Tabular Data

## Classifier example Based on https://docs.fast.ai/tutorial.tabular.html

### Using their approach and trying to make more human friendly ouput

In [None]:
from fastai.tabular.all import *

In [None]:
path = untar_data(URLs.ADULT_SAMPLE)
path.ls()

In [None]:
df = pd.read_csv(path/'adult.csv')
df.head()

In [None]:
dls = TabularDataLoaders.from_csv(path/'adult.csv', path=path, y_names="salary",
    cat_names = ['workclass', 'education', 'marital-status', 'occupation', 'relationship', 'race'],
    cont_names = ['age', 'fnlwgt', 'education-num'],
    procs = [Categorify, FillMissing, Normalize])

In [None]:
splits = RandomSplitter(valid_pct=0.2)(range_of(df))

In [None]:
to = TabularPandas(df, procs=[Categorify, FillMissing,Normalize],
                   cat_names = ['workclass', 'education', 'marital-status', 'occupation', 'relationship', 'race'],
                   cont_names = ['age', 'fnlwgt', 'education-num'],
                   y_names='salary',
                   splits=splits)

In [None]:
dls = to.dataloaders(bs=64)

In [None]:
dls.show_batch()

In [None]:
learn = tabular_learner(dls, metrics=accuracy)

In [None]:
learn.fit_one_cycle(1)

Use either the below for testing or the one after that where you can enter values for input

In [None]:
row, clas, probs = learn.predict(df.iloc[0])

manual entry of input 
ref https://docs.fast.ai/tabular.data.html#tabulardataloaders.test_dl

In [None]:
manual_input = {
    'age': [49], 
    'workclass': ['Private'], 
    'fnlwgt': [101320],
    'education': ['Assoc-acdm'], 
    'education-num': [12.0],
    'marital-status': ['Married-civ-spouse'], 
    'occupation': [''],
    'relationship': ['Wife'],
    'race': ['White'],
}
input = pd.DataFrame(manual_input)

row, clas, probs = learn.predict(input.iloc[0])

#### Making it more human readable

In [None]:
learn.dls.vocab

In [None]:
predicted_class_idx = clas.item()  # Get the class index (1 in your example)
predicted_label = learn.dls.vocab[predicted_class_idx]  # Get corresponding label
confidence = probs[predicted_class_idx].item() * 100  # Get probability as percentage

print(f"Predicted income bracket: {predicted_label}")
print(f"Confidence: {confidence:.2f}%")
print("\nFull probability breakdown:")
for label, prob in zip(learn.dls.vocab, probs):
    print(f"- {label}: {prob.item()*100:.2f}%")

### Using direct approach without using Tabular Pandas 

In [None]:
from fastai.tabular.all import *

In [None]:
path = untar_data(URLs.ADULT_SAMPLE)
path.ls()

In [None]:
df = pd.read_csv(path/'adult.csv')
df.head()

In [None]:
dls = TabularDataLoaders.from_csv(path/'adult.csv', path=path, y_names="salary",
    cat_names = ['workclass', 'education', 'marital-status', 'occupation',
                 'relationship', 'race'],
    cont_names = ['age', 'fnlwgt', 'education-num'],
    procs = [Categorify, FillMissing, Normalize])

learn = tabular_learner(dls, metrics=accuracy)

In [None]:
learn.fit_one_cycle(1)

In [None]:
manual_input = {
    'age': [49], 
    'workclass': ['Private'], 
    'fnlwgt': [101320],
    'education': ['Assoc-acdm'], 
    'education-num': [12.0],
    'marital-status': ['Married-civ-spouse'], 
    'occupation': [''],
    'relationship': ['Wife'],
    'race': ['White'],
}
input = pd.DataFrame(manual_input)

row, clas, probs = learn.predict(input.iloc[0])

In [11]:
learn.dls.vocab


['<50k', '>=50k']

In [None]:
predicted_class_idx = clas.item()  # Get the class index 
predicted_label = learn.dls.vocab[predicted_class_idx]  # Get corresponding label
confidence = probs[predicted_class_idx].item() * 100  # Get probability as percentage

print(f"Predicted income bracket: {predicted_label}")
print(f"Confidence: {confidence:.2f}%")
print("\nFull probability breakdown:")
for label, prob in zip(learn.dls.vocab, probs):
    print(f"- {label}: {prob.item()*100:.2f}%")

Predicted income bracket: >=50k
Confidence: 64.22%

Full probability breakdown:
- <50k: 35.78%
- >=50k: 64.22%
