<a href="https://colab.research.google.com/github/stphnmade/extern-docs/blob/main/Data_Analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Importing Pandas for data cleaning operations

In [None]:
import pandas as pd
import numpy as np

# Task
Convert the `/content/LLCP2024.XPT` file to a CSV file named `LLCP2024.csv`, load the CSV into a pandas DataFrame, and then display its head and info.

### Correcting file path and attempting conversion

In [9]:
import os

# Corrected file path with trailing whitespace
file_path = '/content/LLCP2024.XPT'
csv_path = '/content/LLCP2024.csv'

if os.path.exists(file_path):
    print(f"File '{file_path}' exists. Attempting to read with pandas.read_sas...")
    try:
        df = pd.read_sas(file_path, format='xport')
        df.to_csv(csv_path, index=False)
        print(f"XPT file converted to CSV successfully and saved as {csv_path}")
        print("\nDisplaying the head of the DataFrame:")
        display(df.head())
        print("\nDisplaying info of the DataFrame:")
        df.info()
    except Exception as e:
        print(f"An error occurred during pandas.read_sas or df.to_csv: {e}")
else:
    print(f"Error: The file '{file_path}' was not found by the Python interpreter. Please ensure the file is correctly placed.")

File '/content/LLCP2024.XPT' exists. Attempting to read with pandas.read_sas...
XPT file converted to CSV successfully and saved as /content/LLCP2024.csv

Displaying the head of the DataFrame:


Unnamed: 0,_STATE,FMONTH,IDATE,IMONTH,IDAY,IYEAR,DISPCODE,SEQNO,_PSU,CTELENM1,...,_LCSCTSN,_LCSPSTF,DRNKANY6,DROCDY4_,_RFBING6,_DRNKWK3,_RFDRHV9,_FLSHOT7,_PNEUMO3,_AIDTST4
0,1.0,2.0,b'02282024',b'02',b'28',b'2024',1100.0,b'2024000001',2024000000.0,1.0,...,,9.0,2.0,5.397605e-79,1.0,5.397605e-79,1.0,1.0,2.0,2.0
1,1.0,2.0,b'02212024',b'02',b'21',b'2024',1100.0,b'2024000002',2024000000.0,1.0,...,4.0,9.0,2.0,5.397605e-79,1.0,5.397605e-79,1.0,1.0,1.0,2.0
2,1.0,2.0,b'02212024',b'02',b'21',b'2024',1100.0,b'2024000003',2024000000.0,1.0,...,4.0,2.0,1.0,100.0,2.0,1400.0,1.0,,,2.0
3,1.0,2.0,b'02282024',b'02',b'28',b'2024',1100.0,b'2024000004',2024000000.0,1.0,...,,9.0,2.0,5.397605e-79,1.0,5.397605e-79,1.0,1.0,1.0,2.0
4,1.0,2.0,b'02212024',b'02',b'21',b'2024',1100.0,b'2024000005',2024000000.0,1.0,...,3.0,9.0,2.0,5.397605e-79,1.0,5.397605e-79,1.0,,,2.0



Displaying info of the DataFrame:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 457670 entries, 0 to 457669
Columns: 301 entries, _STATE to _AIDTST4
dtypes: float64(296), object(5)
memory usage: 1.0+ GB


### Cleaning date-related columns

In [8]:
import pandas as pd

# Load the CSV file into a DataFrame, assuming it was successfully saved in the previous step
# If df is already in memory, this step can be skipped, but explicitly loading ensures we work with the CSV.
csv_path = '/content/LLCP2024.csv'
df = pd.read_csv(csv_path)

print("Original DataFrame head before cleaning:")
display(df[['FMONTH', 'IDATE', 'IMONTH', 'IDAY']].head())
print("Original DataFrame info before cleaning:")
df[['FMONTH', 'IDATE', 'IMONTH', 'IDAY']].info()

# Columns to clean from b' prefix
columns_to_clean = ['IDATE', 'IMONTH', 'IDAY']

for col in columns_to_clean:
    if col in df.columns and df[col].dtype == 'object':
        # Remove b' and ' from the string representation
        df[col] = df[col].astype(str).str.replace("b'", "").str.replace("'", "")

# Convert IMONTH and IDAY to integer
for col in ['IMONTH', 'IDAY']:
    if col in df.columns:
        df[col] = pd.to_numeric(df[col], errors='coerce').astype('Int64') # Use 'Int64' to allow for NaN

# Convert FMONTH to integer as well, if it's float
if 'FMONTH' in df.columns and pd.api.types.is_float_dtype(df['FMONTH']):
    df['FMONTH'] = pd.to_numeric(df['FMONTH'], errors='coerce').astype('Int64')

# Try to convert IDATE to datetime, assuming 'MMDDYYYY' format
if 'IDATE' in df.columns:
    df['IDATE'] = pd.to_datetime(df['IDATE'], format='%m%d%Y', errors='coerce')

print("\nCleaned DataFrame head:")
display(df[['FMONTH', 'IDATE', 'IMONTH', 'IDAY']].head())

print("\nCleaned DataFrame info:")
df[['FMONTH', 'IDATE', 'IMONTH', 'IDAY']].info()

# Rename df state and month

df = df.rename(columns={'IMONTH': 'month', 'IDAY': 'day', '_STATE':'State'})


Original DataFrame head before cleaning:


Unnamed: 0,FMONTH,IDATE,IMONTH,IDAY
0,2.0,b'02282024',b'02',b'28'
1,2.0,b'02212024',b'02',b'21'
2,2.0,b'02212024',b'02',b'21'
3,2.0,b'02282024',b'02',b'28'
4,2.0,b'02212024',b'02',b'21'


Original DataFrame info before cleaning:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 457670 entries, 0 to 457669
Data columns (total 4 columns):
 #   Column  Non-Null Count   Dtype  
---  ------  --------------   -----  
 0   FMONTH  457670 non-null  float64
 1   IDATE   457670 non-null  object 
 2   IMONTH  457670 non-null  object 
 3   IDAY    457670 non-null  object 
dtypes: float64(1), object(3)
memory usage: 14.0+ MB

Cleaned DataFrame head:


Unnamed: 0,FMONTH,IDATE,IMONTH,IDAY
0,2,2024-02-28,2,28
1,2,2024-02-21,2,21
2,2,2024-02-21,2,21
3,2,2024-02-28,2,28
4,2,2024-02-21,2,21



Cleaned DataFrame info:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 457670 entries, 0 to 457669
Data columns (total 4 columns):
 #   Column  Non-Null Count   Dtype         
---  ------  --------------   -----         
 0   FMONTH  457670 non-null  Int64         
 1   IDATE   457669 non-null  datetime64[ns]
 2   IMONTH  457669 non-null  Int64         
 3   IDAY    457669 non-null  Int64         
dtypes: Int64(3), datetime64[ns](1)
memory usage: 15.3 MB


## Transforming the state and Month numbers to human readable state names.


## Transforming the state and Month numbers to human readable state names.

I will use standard names for months (1=January, 2=February, etc.) and provide example mappings for a few states. **You may need to update the `state_mapping` dictionary with the full and accurate information from the codebook.**

In [7]:
df["State"] = df["State"].map(state_map)
df["month"] = df["month"].map(month_map)

display(df[["State","month"]].head(6))

# rename and replace _STATE and FMONTH with state and month in the df wiht the maping we made
# Any states that have not been mapped should be filtered out from the data set
# and the new df hould be saved and named BRFSS_clean


df_clean = df[df["State"].isin(state_map)]
display(df_clean.head(6))

Unnamed: 0,State,month
0,,
1,,
2,,
3,,
4,,
5,,


Unnamed: 0,State,FMONTH,IDATE,month,day,...,_DRNKWK3,_RFDRHV9,_FLSHOT7,_PNEUMO3,_AIDTST4


In [27]:
import os
import pandas as pd

# --- Lookups (you already have these; keep them here for completeness) ---
state_map = {
    1: "Alabama",
    5: "Arkansas",
    12: "Florida",
    13: "Georgia",
    22: "Louisiana",
    28: "Mississippi",
    37: "North Carolina",
    45: "South Carolina",
    48: "Texas",
    51: "Virginia",
    54: "West Virginia",
    72: "Puerto Rico",
}

month_map = {
    1: "January", 2: "February", 3: "March", 4: "April",
    5: "May", 6: "June", 7: "July", 8: "August",
    9: "September", 10: "October", 11: "November", 12: "December"
}

def decode_bytes_to_str(series: pd.Series) -> pd.Series:
    """
    Decode a pandas Series that may contain bytes into clean strings.
    Leaves non-bytes alone.
    """
    if series.dtype != "object":
        return series
    return series.map(lambda x: x.decode("utf-8", "ignore") if isinstance(x, (bytes, bytearray)) else x)

def load_brfss_xpt(xpt_path: str) -> pd.DataFrame:
    if not os.path.exists(xpt_path):
        raise FileNotFoundError(f"Could not find XPT at: {xpt_path}")
    return pd.read_sas(xpt_path, format="xport")

def clean_brfss(df: pd.DataFrame) -> pd.DataFrame:
    df = df.copy()

    # --- Decode byte columns safely (IDATE/IMONTH/IDAY often come in as bytes) ---
    for col in ["IDATE", "IMONTH", "IDAY"]:
        if col in df.columns:
            df[col] = decode_bytes_to_str(df[col])

    # --- Convert date parts to numeric (nullable Int64) ---
    for col in ["FMONTH", "IMONTH", "IDAY", "_STATE"]:
        if col in df.columns:
            df[col] = pd.to_numeric(df[col], errors="coerce").astype("Int64")

    # --- Parse IDATE (MMDDYYYY) into datetime ---
    if "IDATE" in df.columns:
        # ensure it is string without ".0" artifacts
        idate_str = df["IDATE"].astype("string").str.replace(r"\.0$", "", regex=True)
        df["interview_date"] = pd.to_datetime(idate_str, format="%m%d%Y", errors="coerce")

    # --- Map state + month labels into new columns (keep raw numeric columns too) ---
    if "_STATE" in df.columns:
        df["state_name"] = df["_STATE"].map(state_map)

    if "FMONTH" in df.columns:
        df["month"] = df["FMONTH"].map(month_map)

    # --- Filter: keep only rows with mapped states (i.e., your chosen subset of states) ---
    # IMPORTANT: filter based on numeric _STATE keys, not the dict object itself
    if "_STATE" in df.columns:
        df = df[df["_STATE"].isin(list(state_map.keys()))].copy()

    # Optional: reorder a few key columns near the front for convenience
    preferred_cols = ["_STATE", "state_name", "FMONTH", "file_month_name", "interview_date"]
    existing_preferred = [c for c in preferred_cols if c in df.columns]
    remaining = [c for c in df.columns if c not in existing_preferred]
    df = df[existing_preferred + remaining]

    return df

# --- Usage ---
xpt_path = "/content/LLCP2024.XPT"

df_raw = load_brfss_xpt(xpt_path)
BRFSS_clean = clean_brfss(df_raw)

display(BRFSS_clean.head(6))
BRFSS_clean.info()

Unnamed: 0,_STATE,state_name,FMONTH,interview_date,IDATE,IMONTH,IDAY,IYEAR,DISPCODE,SEQNO,...,_LCSPSTF,DRNKANY6,DROCDY4_,_RFBING6,_DRNKWK3,_RFDRHV9,_FLSHOT7,_PNEUMO3,_AIDTST4,month
0,1,Alabama,2,2024-02-28,2282024,2,28,b'2024',1100.0,b'2024000001',...,9.0,2.0,5.397605e-79,1.0,5.397605e-79,1.0,1.0,2.0,2.0,February
1,1,Alabama,2,2024-02-21,2212024,2,21,b'2024',1100.0,b'2024000002',...,9.0,2.0,5.397605e-79,1.0,5.397605e-79,1.0,1.0,1.0,2.0,February
2,1,Alabama,2,2024-02-21,2212024,2,21,b'2024',1100.0,b'2024000003',...,2.0,1.0,100.0,2.0,1400.0,1.0,,,2.0,February
3,1,Alabama,2,2024-02-28,2282024,2,28,b'2024',1100.0,b'2024000004',...,9.0,2.0,5.397605e-79,1.0,5.397605e-79,1.0,1.0,1.0,2.0,February
4,1,Alabama,2,2024-02-21,2212024,2,21,b'2024',1100.0,b'2024000005',...,9.0,2.0,5.397605e-79,1.0,5.397605e-79,1.0,,,2.0,February
5,1,Alabama,2,2024-02-21,2212024,2,21,b'2024',1100.0,b'2024000006',...,9.0,2.0,5.397605e-79,1.0,5.397605e-79,1.0,,,2.0,February


<class 'pandas.core.frame.DataFrame'>
Index: 81403 entries, 0 to 456321
Columns: 304 entries, _STATE to month
dtypes: Int64(4), datetime64[ns](1), float64(294), object(5)
memory usage: 189.7+ MB


In [28]:
BRFSS_clean.to_csv("/content/BRFSS_clean.csv", index=False)

In [29]:
analysis_columns = [
    # Geography
    "state_name","month",

    # Demographics
    "SEXVAR", "_AGE80", "INCOME3", "EDUCA", "_RACE",

    # Preventive care (hygiene proxies)
    "CHECKUP1", "LASTDEN4", "FLUSHOT7", "PRIMINS2", "MEDCOST1",

    # Disease burden
    "DIABETE4", "CVDCRHD4", "ASTHMA3", "CHCKDNY2", "GENHLTH", "PHYSHLTH",

    # Survey design
    "_LLCPWT", "_STSTR", "_PSU"
]

In [30]:
BRFSS_sub = BRFSS_clean[analysis_columns].copy()

display(BRFSS_sub.head(10))
BRFSS_sub.info()

Unnamed: 0,_STATE,state_name,month,SEXVAR,_AGE80,INCOME3,EDUCA,_RACE,CHECKUP1,LASTDEN4,...,MEDCOST1,DIABETE4,CVDCRHD4,ASTHMA3,CHCKDNY2,GENHLTH,PHYSHLTH,_LLCPWT,_STSTR,_PSU
0,1,Alabama,February,2.0,78.0,99.0,4.0,1.0,1.0,1.0,...,2.0,3.0,2.0,2.0,2.0,3.0,2.0,261.525511,11011.0,2024000000.0
1,1,Alabama,February,1.0,80.0,11.0,6.0,1.0,1.0,1.0,...,2.0,3.0,1.0,2.0,2.0,1.0,88.0,307.169688,11011.0,2024000000.0
2,1,Alabama,February,1.0,59.0,99.0,5.0,1.0,4.0,4.0,...,1.0,3.0,2.0,2.0,2.0,2.0,30.0,2939.862806,11011.0,2024000000.0
3,1,Alabama,February,1.0,80.0,6.0,6.0,1.0,1.0,1.0,...,2.0,3.0,2.0,2.0,2.0,1.0,88.0,153.584844,11011.0,2024000000.0
4,1,Alabama,February,1.0,47.0,3.0,5.0,1.0,1.0,1.0,...,2.0,3.0,2.0,2.0,2.0,3.0,88.0,1229.623036,11011.0,2024000000.0
5,1,Alabama,February,1.0,54.0,9.0,4.0,1.0,1.0,1.0,...,2.0,1.0,2.0,2.0,2.0,3.0,7.0,451.219871,11011.0,2024000000.0
6,1,Alabama,February,2.0,71.0,6.0,5.0,1.0,1.0,1.0,...,2.0,3.0,2.0,2.0,2.0,4.0,88.0,388.467984,11011.0,2024000000.0
7,1,Alabama,February,2.0,68.0,99.0,5.0,1.0,1.0,2.0,...,2.0,1.0,2.0,2.0,2.0,5.0,30.0,194.233992,11011.0,2024000000.0
8,1,Alabama,February,2.0,70.0,5.0,4.0,1.0,1.0,1.0,...,2.0,3.0,2.0,2.0,2.0,3.0,5.0,455.584194,11011.0,2024000000.0
9,1,Alabama,February,1.0,80.0,7.0,5.0,1.0,1.0,4.0,...,2.0,1.0,1.0,2.0,2.0,4.0,14.0,211.232063,11011.0,2024000000.0


<class 'pandas.core.frame.DataFrame'>
Index: 81403 entries, 0 to 456321
Data columns (total 22 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   _STATE      81403 non-null  Int64  
 1   state_name  81403 non-null  object 
 2   month       81403 non-null  object 
 3   SEXVAR      81403 non-null  float64
 4   _AGE80      81403 non-null  float64
 5   INCOME3     79726 non-null  float64
 6   EDUCA       81399 non-null  float64
 7   _RACE       81403 non-null  float64
 8   CHECKUP1    81401 non-null  float64
 9   LASTDEN4    81401 non-null  float64
 10  FLUSHOT7    73988 non-null  float64
 11  PRIMINS2    81401 non-null  float64
 12  MEDCOST1    81401 non-null  float64
 13  DIABETE4    81400 non-null  float64
 14  CVDCRHD4    81400 non-null  float64
 15  ASTHMA3     81401 non-null  float64
 16  CHCKDNY2    81400 non-null  float64
 17  GENHLTH     81400 non-null  float64
 18  PHYSHLTH    81400 non-null  float64
 19  _LLCPWT     81403 non-null  f

# Step 5: Helpful Descriptive Statistics

In [33]:
import numpy as np
import pandas as pd

df = BRFSS_sub.copy()  # or BRFSS_clean filtered to your states

# --- Helper: binary indicators for prevention ---
# Note: BRFSS codes vary by variable; these are common patterns.
# You may need to tweak "good" codes based on your codebook.
# Examples:
# CHECKUP1: 1=within past year (good)
# LASTDEN4: 1=within past year (good)
# FLUSHOT7: 1=yes (good)
# PRIMINS2: 1=yes (good)

df["checkup_ok"]  = np.where(df["CHECKUP1"] == 1, 1, np.where(df["CHECKUP1"].isna(), np.nan, 0))
df["dental_ok"]   = np.where(df["LASTDEN4"] == 1, 1, np.where(df["LASTDEN4"].isna(), np.nan, 0))
df["flushot_ok"]  = np.where(df["FLUSHOT7"] == 1, 1, np.where(df["FLUSHOT7"].isna(), np.nan, 0))
df["insured_ok"]  = np.where(df["PRIMINS2"] == 1, 1, np.where(df["PRIMINS2"].isna(), np.nan, 0))

# Preventive Care Index (0-4), require at least 2 components answered to compute
components = ["checkup_ok", "dental_ok", "flushot_ok", "insured_ok"]
df["pci_answered"] = df[components].notna().sum(axis=1)
df["PCI"] = np.where(df["pci_answered"] >= 2, df[components].sum(axis=1, skipna=True), np.nan)

# Chronic condition count (adjust list as desired)
conditions = ["DIABETE4", "CVDCRHD4", "ASTHMA3", "CHCKDNY2"]
# Common pattern: 1 = yes (has condition)
for c in conditions:
    df[c + "_yes"] = np.where(df[c] == 1, 1, np.where(df[c].isna(), np.nan, 0))

df["CCC_answered"] = df[[c + "_yes" for c in conditions]].notna().sum(axis=1)
df["CCC"] = np.where(df["CCC_answered"] >= 2, df[[c + "_yes" for c in conditions]].sum(axis=1, skipna=True), np.nan)

# Ensure PHYSHLTH numeric (0-30 typically)
df["PHYSHLTH"] = pd.to_numeric(df["PHYSHLTH"], errors="coerce")

# Weighting Descriptive Stats

In [34]:
def weighted_mean(x, w):
    m = (~np.isnan(x)) & (~np.isnan(w))
    return np.sum(x[m] * w[m]) / np.sum(w[m])

def weighted_max(x):
    return np.nanmax(x)

w = df["_LLCPWT"].to_numpy(dtype=float)

pci = df["PCI"].to_numpy(dtype=float)
ccc = df["CCC"].to_numpy(dtype=float)
poor_days = df["PHYSHLTH"].to_numpy(dtype=float)

print(f"Weighted mean PCI (0-4): {weighted_mean(pci, w):.3f}")
print(f"Max PCI: {weighted_max(pci)}")

print(f"Weighted mean CCC (0-4): {weighted_mean(ccc, w):.3f}")
print(f"Max CCC: {weighted_max(ccc)}")

print(f"Weighted mean Poor Physical Health Days: {weighted_mean(poor_days, w):.3f}")
print(f"Max Poor Physical Health Days: {weighted_max(poor_days)}")

Weighted mean PCI (0-4): 2.058
Max PCI: 4.0
Weighted mean CCC (0-4): 0.375
Max CCC: 4.0
Weighted mean Poor Physical Health Days: 58.948
Max Poor Physical Health Days: 99.0


In [42]:
# Group: sex x income
group_cols = ["SEXVAR", "INCOME3"]

grouped = df.dropna(subset=group_cols).copy()
summary = grouped.groupby(group_cols).apply(
    lambda g: pd.Series({
        "n": len(g),
        "w_mean_PCI": weighted_mean(g["PCI"].to_numpy(float), g["_LLCPWT"].to_numpy(float)),
        "w_mean_poor_days": weighted_mean(g["PHYSHLTH"].to_numpy(float), g["_LLCPWT"].to_numpy(float))
    })
).reset_index()

display(summary.sort_values(["SEXVAR","INCOME3"]))

  summary = grouped.groupby(group_cols).apply(


Unnamed: 0,SEXVAR,INCOME3,n,w_mean_PCI,w_mean_poor_days
0,1,1,776.0,1.276971,55.900713
1,1,2,878.0,1.498483,47.318374
2,1,3,1082.0,1.413789,52.837621
3,1,4,1517.0,1.467857,58.624654
4,1,5,3200.0,1.53771,59.052464
5,1,6,4064.0,1.778027,58.196243
6,1,7,4798.0,2.051438,61.655324
7,1,8,4039.0,2.182352,63.645421
8,1,9,4590.0,2.379028,62.490989
9,1,10,2245.0,2.537807,65.146037


# Step 6: Normalize Data (Min-Max Scaling)


In [44]:
def minmax(series: pd.Series) -> pd.Series:
    s = series.astype(float)
    mn, mx = np.nanmin(s), np.nanmax(s)
    if np.isclose(mx - mn, 0) or np.isnan(mn) or np.isnan(mx):
        return pd.Series(np.nan, index=series.index)
    return (s - mn) / (mx - mn)

df["PCI_norm"] = minmax(df["PCI"])
df["CCC_norm"] = minmax(df["CCC"])
df["PHYSHLTH_norm"] = minmax(df["PHYSHLTH"])

display()

# Step 7: Handle Inconsistent Formats (Type Consistency)

In [41]:
int_cols = ["_STATE","State", "SEXVAR", "_AGE80", "INCOME3", "EDUCA", "_RACE", "FMONTH",
            "_STSTR", "_PSU"]
for c in int_cols:
    if c in df.columns:
        df[c] = pd.to_numeric(df[c], errors="coerce").astype("Int64")

df["_LLCPWT"] = pd.to_numeric(df["_LLCPWT"], errors="coerce").astype(float)

# Step 8: Save Cleaned Data

In [48]:
keep_cols = [
    "state_name","SEXVAR","_AGE80","INCOME3","EDUCA","_RACE",
    "CHECKUP1","LASTDEN4","FLUSHOT7","PRIMINS2","MEDCOST1",
    "DIABETE4","CVDCRHD4","ASTHMA3","CHCKDNY2","GENHLTH","PHYSHLTH",
    "_LLCPWT","_STSTR","_PSU", "PCI","CCC","PCI_norm","CCC_norm","PHYSHLTH_norm"
]

keep_cols = [c for c in keep_cols if c in df.columns]
BRFSS_final = df[keep_cols].copy()

BRFSS_final.to_csv("BRFSS_final.csv", index=False)
display(BRFSS_final.head(10))
print("Saved BRFSS_final.csv")

Unnamed: 0,state_name,SEXVAR,_AGE80,INCOME3,EDUCA,_RACE,CHECKUP1,LASTDEN4,FLUSHOT7,PRIMINS2,...,GENHLTH,PHYSHLTH,_LLCPWT,_STSTR,_PSU,PCI,CCC,PCI_norm,CCC_norm,PHYSHLTH_norm
0,Alabama,2,78,99,4,1,1.0,1.0,1.0,3.0,...,3.0,2.0,261.525511,11011,2024000001,3.0,0.0,0.75,0.0,0.010204
1,Alabama,1,80,11,6,1,1.0,1.0,1.0,3.0,...,1.0,88.0,307.169688,11011,2024000002,3.0,1.0,0.75,0.25,0.887755
2,Alabama,1,59,99,5,1,4.0,4.0,2.0,1.0,...,2.0,30.0,2939.862806,11011,2024000003,1.0,0.0,0.25,0.0,0.295918
3,Alabama,1,80,6,6,1,1.0,1.0,1.0,3.0,...,1.0,88.0,153.584844,11011,2024000004,3.0,0.0,0.75,0.0,0.887755
4,Alabama,1,47,3,5,1,1.0,1.0,1.0,5.0,...,3.0,88.0,1229.623036,11011,2024000005,3.0,0.0,0.75,0.0,0.887755
5,Alabama,1,54,9,4,1,1.0,1.0,1.0,1.0,...,3.0,7.0,451.219871,11011,2024000006,4.0,1.0,1.0,0.25,0.061224
6,Alabama,2,71,6,5,1,1.0,1.0,2.0,3.0,...,4.0,88.0,388.467984,11011,2024000007,2.0,0.0,0.5,0.0,0.887755
7,Alabama,2,68,99,5,1,1.0,2.0,2.0,3.0,...,5.0,30.0,194.233992,11011,2024000008,1.0,1.0,0.25,0.25,0.295918
8,Alabama,2,70,5,4,1,1.0,1.0,1.0,3.0,...,3.0,5.0,455.584194,11011,2024000009,3.0,0.0,0.75,0.0,0.040816
9,Alabama,1,80,7,5,1,1.0,4.0,1.0,3.0,...,4.0,14.0,211.232063,11011,2024000010,2.0,2.0,0.5,0.5,0.132653


Saved BRFSS_final.csv
