# Data prep


In [134]:
import pandas as pd
import plotly.express as px

In [135]:
df = pd.read_excel('AWI_data_analysis_assessment.xlsm', sheet_name = 'AWI_data_RAW')

In [136]:
# Defining functions

def check_if_present(FIELD):
    if FIELD == 1 or FIELD ==2 or FIELD ==6:
        return 1
    else:
        return 0
    
def check_if_zero(FIELD):
    if FIELD == 0:
        return 1
    else:
        return 0
    
def check_if_name(FIELD):
    if FIELD.strip() == '':
        return 0
    else:
        return 1


In [137]:
# Columns to check if if parcel has been marked as woodland
columns_to_assess = ['TITHE','C19_EM', 'OSD', 'CM_1773', 'C18_EM', 'C17_EM', 'EM','OTHER', 'HE_REF' ]

#Loop through each parcel and check if woodland present in source, and if there are any zeros (not woodland)
for i in columns_to_assess:
    df[f"present_{i}"] = df[i].apply(check_if_present)
    df[f"zero_{i}"] = df[i].apply(check_if_zero)

In [138]:
# Combined present to see how many sources show woodland present
df['present_combined'] = df['present_TITHE']+df['present_C19_EM'] +df['present_OSD']+df['present_CM_1773'] +df['present_C18_EM']+df['present_C17_EM'] +df['present_EM']+df['present_OTHER'] +df['present_HE_REF']
df['present_primary'] = df['present_TITHE'] + df['present_OSD'] + df['present_C17_EM']+df['present_HE_REF']
df['present_secondary'] =df['present_C19_EM']+df['present_CM_1773'] +df['present_C18_EM'] +df['present_EM']+df['present_OTHER'] 

# Combine zero to show how many sources show no woodland present
df['zero_combined'] = df['zero_TITHE']+df['zero_C19_EM'] +df['zero_OSD']+df['zero_CM_1773'] +df['zero_C18_EM']+df['zero_C17_EM'] +df['zero_EM']+df['zero_OTHER'] +df['zero_HE_REF']
df['zero_primary'] = df['zero_TITHE']+df['zero_OSD']
df['zero_secondary'] =  df['zero_C19_EM'] +df['zero_CM_1773'] +df['zero_C18_EM']+df['zero_C17_EM'] +df['zero_EM']+df['zero_OTHER'] +df['zero_HE_REF']


In [140]:
# Check which parcels are named

df['NAME_TITHE'] = df['NAME_TITHE'].astype(str)

names_to_assess = ['NAME_TITHE','NAME_EP1', 'NAME_OSD', 'NAME_EM']

for i in names_to_assess:
    df[f"present_{i}"] = df[i].apply(check_if_name)

# Combine to get total named sources for parcel    
df['name_present'] = df['present_NAME_EM']+df['present_NAME_OSD']+df['present_NAME_EP1'] +df['present_NAME_TITHE']

# Decision tree


In [142]:

df_primary = df[df.present_primary >=1]
## A - Ancient woodland
df_primary_no_zero = df_primary[df_primary.zero_combined ==0]
df_primary_one_zero = df_primary[df_primary.zero_combined ==1]
## A - Not ancient woodland 
df_primary_plural_zero = df_primary[df_primary.zero_combined >1]
## B - Ancient woodland
df_primary_one_zero_named = df_primary_one_zero[df_primary_one_zero.name_present >=1]
## A- Maybe ancient woodland
df_primary_one_zero_not_named = df_primary_one_zero[df_primary_one_zero.name_present==0]



In [143]:
df_secondary = df[(df.present_primary ==0)&(df.present_secondary >=1)]
df_no_evidence = df[(df.present_primary ==0)&(df.present_secondary ==0)]

## D - Maybe ancient woodland
# Contains lots of 3/4's (no map coverage)
df_no_evidence_no_zero = df_no_evidence[df_no_evidence.zero_combined ==0]

## C - Not ancient woodland
df_no_evidence_zero = df_no_evidence[df_no_evidence.zero_combined ==1]

In [144]:
df_secondary_no_zero = df_secondary[df_secondary.zero_combined ==0]
## C - Ancient woodland 
df_secondary_no_zero_named = df_secondary_no_zero[df_secondary_no_zero.name_present >=1]
## B- Maybe ancient woodland
df_secondary_no_zero_not_named  = df_secondary_no_zero[df_secondary_no_zero.name_present ==0]

## C - Maybe ancient woodland
df_secondary_one_zero = df_secondary[df_secondary.zero_combined ==1]
## B - Not ancient woodland
df_secondary_plural_zero = df_secondary[df_secondary.zero_combined >1]


In [149]:
fig = px.bar(df, x ='P3_Status', y = 'zero_combined' )
fig.show()

ValueError: Mime type rendering requires nbformat>=4.2.0 but it is not installed

In [146]:
df.columns

Index(['NAME_OS', 'NAME_EP1', 'P3_UID', 'P3_IGNORE', 'C21_AP', 'EPOCH_2',
       'EPOCH_1', 'C20_AP', 'C20_AP_REF', 'TITHE_REF', 'C19_EM', 'C19_EM_REF',
       'OSD', 'OSD_REF', 'CM_1773', 'C18_EM', 'C18_EM_REF', 'C17_EM',
       'C17_EM_REF', 'EM', 'EM_REF', 'OTHER', 'OTHER_REF', 'NAME_TITHE',
       'NAME_OSD', 'NAME_EM', 'TITHE_REF_', 'TITHE_LAND', 'TITHE_PLOT',
       'TITHE_PL_1', 'COMMENTS', 'url', 'OSNAMES_UR', 'CHECK_NAME',
       'TO_CHECK_N', 'NOTES', 'LISTED_BUI', 'LISTED_PAR', 'P2_CLASS',
       'P3_CLASS', 'P3_Status', 'TITHE', 'GlobalID_1', 'EARLIEST_E', 'RESERVE',
       'FORESTRY', 'SURVEY_PRI', 'ACTION', 'SITE_ID', 'AreaHa', 'NAME',
       'Shape_Leng', 'OBJECTID_1', 'HE_REF', 'Shape_Area_1', 'Shape_Length',
       'Shape_Area', 'present_TITHE', 'zero_TITHE', 'present_C19_EM',
       'zero_C19_EM', 'present_OSD', 'zero_OSD', 'present_CM_1773',
       'zero_CM_1773', 'present_C18_EM', 'zero_C18_EM', 'present_C17_EM',
       'zero_C17_EM', 'present_EM', 'zero_EM', 'prese