## **Import Libraries and data**

In [1]:
import numpy as np
import pandas as pd

## **Task: Transform the data for analysis.**
- Techniques used:
    1. `Extract numeric values` from a mixed up column
    2. `Extract text values` from a mixed up column

In [2]:
# Read data

data = pd.read_excel("Medicine-Data-with-lumped-Quantity-and-Measure.xlsx")
data

Unnamed: 0,Description,Quantity
0,lotion Benzylbenzoate lotion,0Bottle
1,Methylated spirit 100ml,0Bottle
2,susp Magnessium Trisilicate 200ml,0Bottle
3,Susp. Amoxicillin 125mg/5ml,0Bottle
4,Susp. Erythromycin 125mg/5ml,0Bottle
...,...,...
2274,Syp Ascorbic acid,20Bottle
2275,syr Cough Syrup (P) 100ml,20Bottle
2276,syr Cough Syrup (A) 100ml,10Bottle
2277,Cotton Wool 100g,1Pcs


### **Solution:**
- The quantity column have numbers and unit into it.
- Task should be to separate them under Quantity and Measure columns

In [3]:
# Extract numeric value from mix using r literal and (\d+)

data['Quantity Value'] = data['Quantity'].str.extract(r'(\d+)').astype('int')

# Extract non-numeric value from mix using r literal and ([a-zA-Z]+)

data['Measure'] = data['Quantity'].str.extract(r'([A-Za-z]+)')

data

Unnamed: 0,Description,Quantity,Quantity Value,Measure
0,lotion Benzylbenzoate lotion,0Bottle,0,Bottle
1,Methylated spirit 100ml,0Bottle,0,Bottle
2,susp Magnessium Trisilicate 200ml,0Bottle,0,Bottle
3,Susp. Amoxicillin 125mg/5ml,0Bottle,0,Bottle
4,Susp. Erythromycin 125mg/5ml,0Bottle,0,Bottle
...,...,...,...,...
2274,Syp Ascorbic acid,20Bottle,20,Bottle
2275,syr Cough Syrup (P) 100ml,20Bottle,20,Bottle
2276,syr Cough Syrup (A) 100ml,10Bottle,10,Bottle
2277,Cotton Wool 100g,1Pcs,1,Pcs


In [4]:
# Drop Quantity column
data.drop(columns= 'Quantity', inplace= True)

data

Unnamed: 0,Description,Quantity Value,Measure
0,lotion Benzylbenzoate lotion,0,Bottle
1,Methylated spirit 100ml,0,Bottle
2,susp Magnessium Trisilicate 200ml,0,Bottle
3,Susp. Amoxicillin 125mg/5ml,0,Bottle
4,Susp. Erythromycin 125mg/5ml,0,Bottle
...,...,...,...
2274,Syp Ascorbic acid,20,Bottle
2275,syr Cough Syrup (P) 100ml,20,Bottle
2276,syr Cough Syrup (A) 100ml,10,Bottle
2277,Cotton Wool 100g,1,Pcs


In [5]:
# Save the transformed data to original file

with pd.ExcelWriter("Medicine-Data-with-lumped-Quantity-and-Measure.xlsx", engine= "openpyxl", mode= 'a') as writer:
    data.to_excel(writer, sheet_name="Transformed_data", index=False)