In [34]:
import pandas as pd
import numpy as np

In [35]:
import matplotlib.pyplot as plt

In [36]:
import seaborn as sns

In [9]:
from data_wrangling import read_cancer_data

In [10]:
data = read_cancer_data()

In [54]:
import pandas as pd

def select_month(df, month_str):
    """
    Selects and returns a subset of a DataFrame based on the specified month.

    Parameters:
    - df (pandas.DataFrame): The DataFrame containing the data.
    - month_str (str): A three-letter abbreviation representing the month (e.g., 'JAN' for January).

    Returns:
    - pandas.DataFrame: A subset of the input DataFrame containing only rows corresponding to the specified month.

    Raises:
    - ValueError: If the specified month abbreviation is not valid.

    Example:
    >>> data = pd.DataFrame({'MONTH': ['JAN', 'FEB', 'MAR', 'APR', 'MAY'],
    ...                      'Value': [10, 15, 20, 25, 30]})
    >>> selected_data = select_month(data, 'mar')
    >>> print(selected_data)
      MONTH  Value
    2   MAR     20
    """
    # List of valid month abbreviations
    month_list = ['APR', 'MAY', 'JUN', 'JUL',
                  'AUG', 'SEP', 'OCT', 'NOV',
                  'DEC', 'JAN', 'FEB', 'MAR']

    # Convert input month string to uppercase and use the first three characters
    month_str = month_str[:3].upper()

    # Check if the specified month is valid
    if month_str in month_list:
        # Select rows corresponding to the specified month
        df_month = df.loc[df.MONTH == month_str]
        return df_month
    else:
        # Raise an error for invalid month abbreviation
        raise ValueError("Invalid month abbreviation. Please enter a valid three-letter month abbreviation.")


In [None]:
def nhs_code_link():
    
    """This function reads a link file between the 'ORG CODE' and NHS Trust name
    Based on NHS Digital data provided here: https://odsdatapoint.digital.nhs.uk/predefined
    """
    
    link_data = pd.read_csv("")

In [31]:
data.head()

Unnamed: 0,PERIOD,YEAR,MONTH,STANDARD,ORG CODE,STAGE/ROUTE,TREATMENT MODALITY,CANCER TYPE,TOTAL,WITHIN STANDARD,BREACHES
0,2022-04-01,2022/23,APR,28-day FDS,R1K,"BREAST SYMPTOMATIC, CANCER NOT SUSPECTED",,Exhibited (non-cancer) breast symptoms - cance...,262.0,255.0,7.0
1,2022-04-01,2022/23,APR,28-day FDS,R0A,"BREAST SYMPTOMATIC, CANCER NOT SUSPECTED",,Exhibited (non-cancer) breast symptoms - cance...,382.0,144.0,238.0
2,2022-04-01,2022/23,APR,28-day FDS,R0D,"BREAST SYMPTOMATIC, CANCER NOT SUSPECTED",,Exhibited (non-cancer) breast symptoms - cance...,3.0,2.0,1.0
3,2022-04-01,2022/23,APR,28-day FDS,R1F,"BREAST SYMPTOMATIC, CANCER NOT SUSPECTED",,Exhibited (non-cancer) breast symptoms - cance...,66.0,63.0,3.0
4,2022-04-01,2022/23,APR,28-day FDS,RAS,"BREAST SYMPTOMATIC, CANCER NOT SUSPECTED",,Exhibited (non-cancer) breast symptoms - cance...,81.0,78.0,3.0


PERIOD                                              2022-04-01 00:00:00
YEAR                                                            2022/23
MONTH                                                               APR
STANDARD                                                     28-day FDS
ORG CODE                                                            R1K
STAGE/ROUTE                    BREAST SYMPTOMATIC, CANCER NOT SUSPECTED
TREATMENT MODALITY                                                  NaN
CANCER TYPE           Exhibited (non-cancer) breast symptoms - cance...
TOTAL                                                             262.0
WITHIN STANDARD                                                   255.0
BREACHES                                                            7.0
Name: 0, dtype: object

In [53]:
# plt.plot(data.iloc[0].PERIOD, data.iloc[0].TOTAL) 
data['ORG CODE'].unique()

array(['R1K', 'R0A', 'R0D', 'R1F', 'RAS', 'RA9', 'RAE', 'RCF', 'RA2',
       'R1H', 'RCB', 'RA4', 'RJL', 'RAL', 'RCD', 'RD1', 'RAX', 'RAP',
       'RFR', 'RAJ', 'RK9', 'RBN', 'RGN', 'RDE', 'RBD', 'REM', 'RJZ',
       'RBK', 'RM1', 'RC9', 'RGP', 'REF', 'RCX', 'RF4', 'RP5', 'RBT',
       'RMC', 'RPY', 'RH8', 'RFS', 'RGT', 'RFF', 'RTR', 'RD8', 'RMP',
       'RWE', 'RJ2', 'RGR', 'RH5', 'RJ6', 'RVV', 'RDU', 'RRF', 'RWG',
       'RJE', 'RJ7', 'RHW', 'RJC', 'RWP', 'RHM', 'RTD', 'RXL', 'RLT',
       'RK5', 'RJ1', 'RKB', 'RNS', 'RHQ', 'RTE', 'RN5', 'RQW', 'RKE',
       'RJN', 'RNA', 'RTG', 'RHU', 'RWA', 'RNZ', 'RTK', 'RL4', 'RN7',
       'RR7', 'RVW', 'RLQ', 'RWD', 'RTH', 'RWW', 'RNQ', 'RNN', 'RTP',
       'RWY', 'RN3', 'RX1', 'RTX', 'RXC', 'RQX', 'RVJ', 'RXR', 'RXF',
       'RPA', 'RXN', 'RRV', 'RYJ', 'RQM', 'RTF', 'RR8', 'RXK', 'RRK',
       'RXP', 'RWF', 'RXW', 'RWH', 'RYR', 'REP', 'RBL', 'RQ3', 'RVR',
       'RM3', 'RVY', 'NV1', 'RXQ', 'RA7', 'RJR', 'R0B', 'RWJ', 'RET',
       'RP6', 'NYG',

In [None]:
import pandas as pd

def select_area(df, org_str):
    """
    Selects and returns a subset of a DataFrame based on the specified month.

    Parameters:
    - df (pandas.DataFrame): The DataFrame containing the data.
    - month_str (str): A three-letter abbreviation representing the month (e.g., 'JAN' for January).

    Returns:
    - pandas.DataFrame: A subset of the input DataFrame containing only rows corresponding to the specified month.

    Raises:
    - ValueError: If the specified month abbreviation is not valid.

    Example:
    >>> data = pd.DataFrame({'MONTH': ['JAN', 'FEB', 'MAR', 'APR', 'MAY'],
    ...                      'Value': [10, 15, 20, 25, 30]})
    >>> selected_data = select_month(data, 'mar')
    >>> print(selected_data)
      MONTH  Value
    2   MAR     20
    """
    # List of valid month abbreviations
    month_list = ['APR', 'MAY', 'JUN', 'JUL',
                  'AUG', 'SEP', 'OCT', 'NOV',
                  'DEC', 'JAN', 'FEB', 'MAR']

    # Convert input month string to uppercase and use the first three characters
    month_str = month_str[:3].upper()

    # Check if the specified month is valid
    if month_str in month_list:
        # Select rows corresponding to the specified month
        df_month = df.loc[df.MONTH == month_str]
        return df_month
    else:
        # Raise an error for invalid month abbreviation
        raise ValueError("Invalid month abbreviation. Please enter a valid three-letter month abbreviation.")