# Create New Bone Tissue Variables

The following code creates 4 new cortical bone variables from the pixel counts of the cortical bone tissue mapped image. 

The first code chunk will create a new variable by adding all the tissue types to create pixel cortical area (C.Ar).

The second code chunk will convert the pixel values into percentages (e.g., tissue type/C.Ar) and into new variables.

The third code chunk will create two new variables calculating percent periosteal area (e.g., periosteal tissue type/periosteal area) and percent endosteal area (e.g., endosteal tissue type/endosteal area).  

**All codes assume long format data frames.**

*Note: These codes are set up for one map's values. 

### Modules
Modules to import/load

In [1]:
import numpy as np
import pandas as pd

### Load in dataset

In [5]:
# Load the .csv file
df = pd.read_csv('Long_format_tissue_map_pixels.csv')

# Display the first few rows of the DataFrame
df.head()

Unnamed: 0,ID,Tissue Type,Pixel Count
0,GP.004,WOV,631440
1,GP.004,FLC,9856109
2,GP.004,FLC-LZPO,7635902
3,GP.004,PF,23627650
4,GP.004,PF-LAM,3850163


### Pixel number of cortical area (C.Ar)

In [13]:
# Calculate the pixel count for C.Ar (excluding BK, which is the background)
c_ar_pixel_count = df[df['Tissue Type'] != 'BK']['Pixel Count'].sum()

# Append the new tissue type and its count to the dataframe
df_pixel_counts = df.append({'Tissue Type': 'C.Ar', 'Pixel Count': c_ar_pixel_count}, ignore_index=True)
df_pixel_counts


  df_pixel_counts = df_pixel_counts.append({'Tissue Type': 'C.Ar', 'Pixel Count': c_ar_pixel_count}, ignore_index=True)


Unnamed: 0,ID,Tissue Type,Pixel Count
0,GP.004,WOV,631440
1,GP.004,FLC,9856109
2,GP.004,FLC-LZPO,7635902
3,GP.004,PF,23627650
4,GP.004,PF-LAM,3850163
...,...,...,...
1075,101,EPF-LAM,3508
1076,101,SF,35902
1077,101,INT,0
1078,101,HAV,101630


### Reorder C.Ar above BK
Optional, but I prefer usable variables together over a useless variable like BK (i.e., background) 

In [None]:
# Reorder the dataframe to move C.Ar above BK
reordered_index = df_pixel_counts.index.tolist()
reordered_index.remove(14)
reordered_index.insert(-1, 14)
df_pixel_counts_reordered = df_pixel_counts.reindex(reordered_index).reset_index(drop=True)
df_pixel_counts_reordered


### Convert pixel counts into percentages 

In [None]:
# Calculate the percent of each tissue type based on C.Ar (excluding BK)
c_ar_value = df_pixel_counts_reordered[df_pixel_counts_reordered['Tissue Type'] == 'C.Ar']['Pixel Count'].values[0]
df_pixel_counts_reordered['Percent of C.Ar'] = np.where(df_pixel_counts_reordered['Tissue Type'] != 'BK', 
                                                       (df_pixel_counts_reordered['Pixel Count'] / c_ar_value) * 100, 
                                                       np.nan)

df_pixel_counts_reordered


### Calculate percent periosteal tissue types by area

In [None]:
# Calculate the sum for the tissues from WOV to LAM
sum_tissues = df_pixel_counts_reordered[df_pixel_counts_reordered['Tissue Type'].isin(['WOV', 'FLC', 'FLC-LZPO', 'PF', 'PF-LAM', 'LAM'])]['Pixel Count'].sum()

# Calculate the percent of each tissue type based on the sum
df_pixel_counts_reordered['Percent of P.Ar'] = np.where(df_pixel_counts_reordered['Tissue Type'].isin(['WOV', 'FLC', 'FLC-LZPO', 'PF', 'PF-LAM', 'LAM']), 
                                                       (df_pixel_counts_reordered['Pixel Count'] / sum_tissues) * 100, 
                                                       np.nan)

df_pixel_counts_reordered


### Calculate percent endosteal tissue types by area

In [None]:
# Calculate the sum for the tissues from ELAM to EPF-LAM
sum_tissues_e = df_pixel_counts_reordered[df_pixel_counts_reordered['Tissue Type'].isin(['ELAM', 'ECCC', 'EPF', 'EPF-LAM'])]['Pixel Count'].sum()

# Calculate the percent of each tissue type based on the sum
df_pixel_counts_reordered['Percent of E.Ar'] = np.where(df_pixel_counts_reordered['Tissue Type'].isin(['ELAM', 'ECCC', 'EPF', 'EPF-LAM']), 
                                                       (df_pixel_counts_reordered['Pixel Count'] / sum_tissues_e) * 100, 
                                                       np.nan)

df_pixel_counts_reordered


### Export .cvs file

In [None]:
df_pixel_counts_reordered.to_csv('df_map_values.csv', index=False)

## Code to calculate C.Ar and percentages in a data frame

In [9]:
def calculate_percentages_updated_v5(df):
    # Get the ID value for the group
    current_id = df['ID'].iloc[0]
    
    # Check for BK presence, if not present use 0
    bk_pixel_count = df[df['Tissue Type'] == 'BK']['Pixel Count'].iloc[0] if 'BK' in df['Tissue Type'].values else 0
    
    # Calculate C.Ar
    c_ar_value = df['Pixel Count'].sum() - bk_pixel_count
    
    # Add the C.Ar row to the dataframe using pandas.concat
    new_row = pd.DataFrame({'ID': [current_id], 'Tissue Type': ['C.Ar'], 'Pixel Count': [c_ar_value]})
    df = pd.concat([df, new_row], ignore_index=True)
    
    # Calculate the percent of each tissue type based on C.Ar (excluding BK)
    df['Percent of C.Ar'] = np.where(df['Tissue Type'] != 'BK', 
                                     (df['Pixel Count'] / c_ar_value) * 100, 
                                     np.nan)
    
    # Calculate the sum for the tissues from WOV to LAM
    sum_tissues = df[df['Tissue Type'].isin(['WOV', 'FLC', 'FLC-LZPO', 'PF', 'PF-LAM', 'LAM'])]['Pixel Count'].sum()

    # Calculate the percent of each tissue type based on the sum
    df['Percent of P.Ar'] = np.where(df['Tissue Type'].isin(['WOV', 'FLC', 'FLC-LZPO', 'PF', 'PF-LAM', 'LAM']), 
                                     (df['Pixel Count'] / sum_tissues) * 100, 
                                     np.nan)
    
    # Calculate the sum for the tissues from ELAM to EPF-LAM
    sum_tissues_e = df[df['Tissue Type'].isin(['ELAM', 'ECCC', 'EPF', 'EPF-LAM'])]['Pixel Count'].sum()

    # Calculate the percent of each tissue type based on the sum
    df['Percent of E.Ar'] = np.where(df['Tissue Type'].isin(['ELAM', 'ECCC', 'EPF', 'EPF-LAM']), 
                                     (df['Pixel Count'] / sum_tissues_e) * 100, 
                                     np.nan)
    
    return df


### Code to group ID

In [10]:
def calculate_percentages_per_id(df):
    # Group by ID and apply calculations
    df_grouped = df.groupby('ID').apply(calculate_percentages_updated_v5)
    
    # Reset the index
    df_grouped.reset_index(drop=True, inplace=True)
    
    return df_grouped


### Convert and calculate pixels

In [11]:
# Test the updated function
result_df_per_id = calculate_percentages_per_id(df)
result_df_per_id.head(15)

Unnamed: 0,ID,Tissue Type,Pixel Count,Percent of C.Ar,Percent of P.Ar,Percent of E.Ar
0,101,WOV,0,0.0,0.0,
1,101,FLC,0,0.0,0.0,
2,101,FLC-LZPO,3512,0.286754,0.405799,
3,101,PF,0,0.0,0.0,
4,101,PF-LAM,7020,0.573181,0.811135,
5,101,LAM,854922,69.804139,98.783066,
6,101,ELAM,218250,17.820051,,98.418095
7,101,ECCC,0,0.0,,0.0
8,101,EPF,0,0.0,,0.0
9,101,EPF-LAM,3508,0.286427,,1.581905


### Export data frame

In [13]:
result_df_per_id.to_csv('df_batch_values.csv', index=False)

## Povit data from long format to wide format

In [15]:
# Load the provided CSV file
df_batch_values = pd.read_csv('df_batch_values.csv')

# Pivot the dataframe to wide format
df_wide = df_batch_values.pivot(index='ID', columns='Tissue Type')

# Flatten the MultiIndex to make the column naming clearer
df_wide.columns = ['_'.join(col).strip() for col in df_wide.columns.values]

df_wide.reset_index(inplace=True)
df_wide.head(10)


Unnamed: 0,ID,Pixel Count_C.Ar,Pixel Count_ECCC,Pixel Count_ELAM,Pixel Count_EPF,Pixel Count_EPF-LAM,Pixel Count_FLC,Pixel Count_FLC-LZPO,Pixel Count_HAV,Pixel Count_INT,...,Percent of E.Ar_EPF-LAM,Percent of E.Ar_FLC,Percent of E.Ar_FLC-LZPO,Percent of E.Ar_HAV,Percent of E.Ar_INT,Percent of E.Ar_LAM,Percent of E.Ar_PF,Percent of E.Ar_PF-LAM,Percent of E.Ar_SF,Percent of E.Ar_WOV
0,101,1224744,0,218250,0,3508,0,3512,101630,0,...,1.581905,,,,,,,,,
1,11,641657,0,659,0,2447,0,5546,38259,0,...,78.783001,,,,,,,,,
2,13,1301092,45373,18281,0,14364,0,0,167289,0,...,18.411136,,,,,,,,,
3,14,985308,52255,376,0,10107,0,3721,73937,0,...,16.109854,,,,,,,,,
4,15,1158326,14168,29392,0,2592,0,1846,47126,0,...,5.616225,,,,,,,,,
5,16,1121248,6598,1384,0,7387,0,0,63140,0,...,48.064285,,,,,,,,,
6,18,1804662,122073,124163,0,86510,0,0,404409,0,...,25.99881,,,,,,,,,
7,21,647860,0,17696,962,21818,0,69567,81756,0,...,53.903548,,,,,,,,,
8,25,1020257,14852,10188,3671,20274,0,907,83675,0,...,41.38818,,,,,,,,,
9,27,508195,0,1944,0,9314,3397,158362,12447,0,...,82.732279,,,,,,,,,


In [None]:
### Export data frame