In [1]:
# Import Libraries

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib
import seaborn as sns
import plotly.express as px
import plotly.io as pio
import plotly
from ydata_profiling import  ProfileReport
import PyQt5 as qt
from IPython.display import display, Markdown
#Enable graphing inside jupytor
#pip install PyQt5 #Install it if not installed
get_ipython().run_line_magic('matplotlib', 'inline')
matplotlib.get_backend()

'module://matplotlib_inline.backend_inline'

In [3]:
# Load Data

In [4]:
df = pd.read_excel('maintenance_cleaned.xlsx')

In [5]:
# Analyze DataSet function

In [6]:
def analyzeDataSet(DataSet,state):
    print('Display data')
    print(DataSet.head(3))
    print('Data Info')
    print(DataSet.info())
    print('Describe Data')
    print(DataSet.describe().round(2))
    print('DataFrame for Information about Dataset')
    information_DataSet = pd.DataFrame({"name": DataSet.columns,
                     "non-nulls": len(DataSet)-DataSet.isnull().sum().values,
                     "nulls": DataSet.isnull().sum().values,
                     "type": DataSet.dtypes.values})
    display(Markdown(information_DataSet.to_markdown()))
    print(information_DataSet)
    # Construct rows
    info_list=[]
    for column in DataSet.columns:
        row = [column,
               min(df[column]),
               max(df[column]),
               df[column].nunique(),
               df[column].isna().sum(),
               df.duplicated().sum()
              ]
        info_list.append(row)
    print('DataFrame for information about Dataset Values') 
    # Convert List to DataFrame
    info_df = pd.DataFrame(data = info_list,
                            columns=['Column_name',
                                     'Minimum_value',
                                     'Maximum_value',
                                     'Number_of_unique_records',
                                     'Number_of_null_records',
                                     'Number_of_duplicated_records'
                                    ])
    display(Markdown(info_df.to_markdown()))
    print(info_df)
    print('show data types')
    print(info_df.dtypes)
    print('Remove comment character if you want to proceed Running Ydata Report')
    #pf = ProfileReport(df)
    #if state == 'pre':
    #    pf.to_file('maintenance_BEFORE_pre_process.html')
    #elif state == 'post':
    #    pf.to_file('maintenance_AFTER_pre_process.html')
    #else :
    #    print('for state of analysis, use "pre" or "post"')

In [7]:
# Data pre processing

In [8]:
## Date Processing

In [9]:
### Extract yearIn, monthIn, monthNIn, dayIn, dayNIn from 'Date in' field

In [10]:
df['yearIn'] = df['date in'].dt.year
df['monthIn'] = df['date in'].dt.month
df['monthNIn'] = df['date in'].dt.month_name()
df['dayIn'] = df['date in'].dt.day
df['dayNIn'] = df['date in'].dt.day_name()


In [11]:
### Extract yearReady, monthReady, monthNReady, dayReady, dayNReady from 'date ready' field

In [12]:
df['yearReady'] = df['date ready'].dt.year
df['monthReady'] = df['date ready'].dt.month
df['monthNReady'] = df['date ready'].dt.month_name()
df['dayReady'] = df['date ready'].dt.day
df['dayNReady'] = df['date ready'].dt.day_name()

In [13]:
### Calculate service duration

In [14]:
df['service_duration'] = (df['date ready'] - df['date in']).dt.days + 1

In [15]:
## Categorization

In [16]:
### Cost Category

In [17]:
cost_dict = {
    range(1,50,1)     :'0001:0050',
    range(50,100,1)   :'0050:0100',
    range(100,150,1)  :'0100:0150',
    range(150,200,1)  :'0150:0200',
    range(200,300,1)  :'0200:0300',
    range(300,400,1)  :'0300:0400',
    range(400,500,1)  :'0400:0500',
    range(500,600,1)  :'0500:0600',
    range(600,700,1)  :'0600:0700',
    range(700,800,1)  :'0700:0800',
    range(800,900,1)  :'0800:0900',
    range(900,1000,1) :'0900:1000',
    range(1000,1500,1):'1000:1500',
    range(1500,2000,1):'1500:2000',
    range(2000,3000,1):'2000:3000'
}
df['cost_category'] = df['cost'].replace(cost_dict)

In [18]:
# Analyze DataSet Post processing

In [19]:
analyzeDataSet(df,'post')

Display data
   # plate number      car    damage type    date in  KMs IN  Fuel in  \
0  1     70-29280   TUCSAN     اصلاح بودي 2015-01-14   23015     0.13   
1  2     70-26587  ELANTRA     اصلاح بودي 2015-01-14   43638     0.38   
2  3     70-25180   AVANZA  اصلاح مكانيك  2015-01-14   39880     0.38   

  date ready  KMs out  KMs Diff  ...  monthNIn  dayIn     dayNIn yearReady  \
0 2015-01-18    23030        15  ...   January     14  Wednesday      2015   
1 2015-01-18    43638         0  ...   January     14  Wednesday      2015   
2 2015-01-24    39889         9  ...   January     14  Wednesday      2015   

  monthReady monthNReady dayReady dayNReady  service_duration  cost_category  
0          1     January       18    Sunday                 5      0150:0200  
1          1     January       18    Sunday                 5      0200:0300  
2          1     January       24  Saturday                11      0050:0100  

[3 rows x 30 columns]
Data Info
<class 'pandas.core.frame.DataFr

|    | name             |   non-nulls |   nulls | type           |
|---:|:-----------------|------------:|--------:|:---------------|
|  0 | #                |        1499 |       0 | int64          |
|  1 | plate number     |        1499 |       0 | object         |
|  2 | car              |        1499 |       0 | object         |
|  3 | damage type      |        1499 |       0 | object         |
|  4 | date in          |        1499 |       0 | datetime64[ns] |
|  5 | KMs IN           |        1499 |       0 | int64          |
|  6 | Fuel in          |        1499 |       0 | float64        |
|  7 | date ready       |        1499 |       0 | datetime64[ns] |
|  8 | KMs out          |        1499 |       0 | int64          |
|  9 | KMs Diff         |        1499 |       0 | int64          |
| 10 | Fuel out         |        1499 |       0 | float64        |
| 11 | Fuel Diff        |        1499 |       0 | float64        |
| 12 | cost             |        1499 |       0 | int64          |
| 13 | location         |        1499 |       0 | object         |
| 14 | corporate        |        1499 |       0 | object         |
| 15 | delivered by     |        1499 |       0 | object         |
| 16 | returned by      |        1499 |       0 | object         |
| 17 | notes            |        1499 |       0 | object         |
| 18 | yearIn           |        1499 |       0 | int64          |
| 19 | monthIn          |        1499 |       0 | int64          |
| 20 | monthNIn         |        1499 |       0 | object         |
| 21 | dayIn            |        1499 |       0 | int64          |
| 22 | dayNIn           |        1499 |       0 | object         |
| 23 | yearReady        |        1499 |       0 | int64          |
| 24 | monthReady       |        1499 |       0 | int64          |
| 25 | monthNReady      |        1499 |       0 | object         |
| 26 | dayReady         |        1499 |       0 | int64          |
| 27 | dayNReady        |        1499 |       0 | object         |
| 28 | service_duration |        1499 |       0 | int64          |
| 29 | cost_category    |        1499 |       0 | object         |

                name  non-nulls  nulls            type
0                  #       1499      0           int64
1       plate number       1499      0          object
2                car       1499      0          object
3        damage type       1499      0          object
4            date in       1499      0  datetime64[ns]
5             KMs IN       1499      0           int64
6            Fuel in       1499      0         float64
7         date ready       1499      0  datetime64[ns]
8            KMs out       1499      0           int64
9           KMs Diff       1499      0           int64
10          Fuel out       1499      0         float64
11         Fuel Diff       1499      0         float64
12              cost       1499      0           int64
13          location       1499      0          object
14         corporate       1499      0          object
15      delivered by       1499      0          object
16       returned by       1499      0          object
17        

|    | Column_name      | Minimum_value                        | Maximum_value       |   Number_of_unique_records |   Number_of_null_records |   Number_of_duplicated_records |
|---:|:-----------------|:-------------------------------------|:--------------------|---------------------------:|-------------------------:|-------------------------------:|
|  0 | #                | 1                                    | 1499                |                       1499 |                        0 |                              0 |
|  1 | plate number     | 70-24337                             | 70-91371            |                        337 |                        0 |                              0 |
|  2 | car              | GUNSAYIL                             | YARIS               |                         25 |                        0 |                              0 |
|  3 | damage type      | اصلاح بودي                           | غيار زيت            |                          7 |                        0 |                              0 |
|  4 | date in          | 2015-01-01 00:00:00                  | 2016-02-03 00:00:00 |                        301 |                        0 |                              0 |
|  5 | KMs IN           | 390                                  | 754935              |                       1423 |                        0 |                              0 |
|  6 | Fuel in          | 0.0                                  | 1.0                 |                         10 |                        0 |                              0 |
|  7 | date ready       | 2015-01-01 00:00:00                  | 2016-02-03 00:00:00 |                        320 |                        0 |                              0 |
|  8 | KMs out          | 400                                  | 754945              |                       1427 |                        0 |                              0 |
|  9 | KMs Diff         | 0                                    | 71                  |                         39 |                        0 |                              0 |
| 10 | Fuel out         | 0.0                                  | 1.0                 |                          9 |                        0 |                              0 |
| 11 | Fuel Diff        | -0.38                                | 0.38                |                         11 |                        0 |                              0 |
| 12 | cost             | 2                                    | 2500                |                        166 |                        0 |                              0 |
| 13 | location         | 4 جيد                                | هانكونك             |                         32 |                        0 |                              0 |
| 14 | corporate        | Aig                                  | مؤجر سنوي           |                         55 |                        0 |                              0 |
| 15 | delivered by     | Abdalla                              | Yousif              |                         30 |                        0 |                              0 |
| 16 | returned by      | Abdalla                              | Yousif              |                         30 |                        0 |                              0 |
| 17 | notes            | غيار زيت +فلتر هواء +فلتر كندشن+بريك | ويل بلنس / ترصيص    |                        666 |                        0 |                              0 |
| 18 | yearIn           | 2015                                 | 2016                |                          2 |                        0 |                              0 |
| 19 | monthIn          | 1                                    | 12                  |                         12 |                        0 |                              0 |
| 20 | monthNIn         | April                                | September           |                         12 |                        0 |                              0 |
| 21 | dayIn            | 1                                    | 31                  |                         31 |                        0 |                              0 |
| 22 | dayNIn           | Friday                               | Wednesday           |                          7 |                        0 |                              0 |
| 23 | yearReady        | 2015                                 | 2016                |                          2 |                        0 |                              0 |
| 24 | monthReady       | 1                                    | 12                  |                         12 |                        0 |                              0 |
| 25 | monthNReady      | April                                | September           |                         12 |                        0 |                              0 |
| 26 | dayReady         | 1                                    | 31                  |                         31 |                        0 |                              0 |
| 27 | dayNReady        | Friday                               | Wednesday           |                          7 |                        0 |                              0 |
| 28 | service_duration | 1                                    | 70                  |                         21 |                        0 |                              0 |
| 29 | cost_category    | 0001:0050                            | 2000:3000           |                         15 |                        0 |                              0 |

         Column_name                                      Minimum_value  \
0                  #                                                  1   
1       plate number                                         70-24337     
2                car                                          GUNSAYIL    
3        damage type                                         اصلاح بودي   
4            date in                                2015-01-01 00:00:00   
5             KMs IN                                                390   
6            Fuel in                                                0.0   
7         date ready                                2015-01-01 00:00:00   
8            KMs out                                                400   
9           KMs Diff                                                  0   
10          Fuel out                                                0.0   
11         Fuel Diff                                              -0.38   
12              cost     

In [20]:
# Save DataSet post processing to new Excel file

In [21]:
df.to_excel('maintenance_cleaned_extended.xlsx')