# Import Libraries

In [1]:
# Load libraries
import numpy as np
from matplotlib import pyplot
from pandas import read_csv 
from pandas import set_option
set_option('display.max_rows', 500)
set_option('display.max_columns', 500)
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime
import pyodbc
from pandas.plotting import scatter_matrix
import seaborn as sns
import warnings
warnings.filterwarnings("ignore")

# Connect to SQL Server

In [2]:
pyodbc.drivers()

['SQL Server', 'ODBC Driver 17 for SQL Server']

In [3]:
connection = pyodbc.connect(
"Driver={ODBC Driver 17 for SQL Server};"
"Server=DESKTOP-ROTIMI\SQLEXPRESS;"
"Database=LMS;"
"Trusted_Connection=yes;")

# load dataset

#### Extracting data from SQL Server

In [4]:
# View data from SQL Server
Fine_Details = pd.read_sql("Select * from [dbo].[LMS_FINE_DETAILS]",connection)


# Fine_Details Table

#### View Data

In [5]:
# head
Fine_Details.head(6)

Unnamed: 0,FINE_RANGE,FINE_AMOUNT,MAX_DAYS_DELAYED
0,R1,20.0,5
1,R2,50.0,10
2,R3,75.0,15
3,R4,100.0,20
4,R5,150.0,25
5,R6,200.0,90


#### shape of the data

In [6]:
# shape
print(Fine_Details.shape)


(6, 3)


In [7]:
#Basic info
print(Fine_Details.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6 entries, 0 to 5
Data columns (total 3 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   FINE_RANGE        6 non-null      object 
 1   FINE_AMOUNT       6 non-null      float64
 2   MAX_DAYS_DELAYED  6 non-null      int64  
dtypes: float64(1), int64(1), object(1)
memory usage: 276.0+ bytes
None


#### types of the data column

In [8]:
# types
print(Fine_Details.dtypes)


FINE_RANGE           object
FINE_AMOUNT         float64
MAX_DAYS_DELAYED      int64
dtype: object


#### data descriptions

In [9]:
# Summary statistics (numerical columns)
Fine_Details.describe()


Unnamed: 0,FINE_AMOUNT,MAX_DAYS_DELAYED
count,6.0,6.0
mean,99.166667,27.5
std,66.363896,31.424513
min,20.0,5.0
25%,56.25,11.25
50%,87.5,17.5
75%,137.5,23.75
max,200.0,90.0


#### data descriptions

In [10]:
# descriptions
Fine_Details.describe(include = 'object')


Unnamed: 0,FINE_RANGE
count,6
unique,6
top,R1
freq,1


#### Check for missing entries

In [11]:
# Check for missing entries
total = Fine_Details.isnull().sum().sort_values(ascending=False)
percent=(Fine_Details.isnull().sum()*100/len(Fine_Details)).sort_values(ascending=False)
missing_data = pd.concat([total, percent], axis=1,keys=['Total', 'Percent'])
missing_data.head(20)

Unnamed: 0,Total,Percent
FINE_RANGE,0,0.0
FINE_AMOUNT,0,0.0
MAX_DAYS_DELAYED,0,0.0


#### Check for duplicate data

In [12]:
# locate rows of duplicate data

# calculate duplicates
dups = Fine_Details.duplicated()
# report if there are any duplicates
print(dups.any())


False


In [13]:
# types
print(Fine_Details.dtypes)



FINE_RANGE           object
FINE_AMOUNT         float64
MAX_DAYS_DELAYED      int64
dtype: object


# Analysis

In [14]:
# Top Book Category
Fine_Details['FINE_RANGE'].value_counts()

FINE_RANGE
R1    1
R2    1
R3    1
R4    1
R5    1
R6    1
Name: count, dtype: int64

In [15]:
# Most Common Authors
Fine_Details['FINE_AMOUNT'].value_counts()

FINE_AMOUNT
20.0     1
50.0     1
75.0     1
100.0    1
150.0    1
200.0    1
Name: count, dtype: int64

In [16]:
# Number of Books Published by Each Publisher
Fine_Details['MAX_DAYS_DELAYED'].value_counts()

MAX_DAYS_DELAYED
5     1
10    1
15    1
20    1
25    1
90    1
Name: count, dtype: int64

In [17]:
# Export Fine_Details data to CSV without index
Fine_Details.to_csv("Fine_Details.csv",index = False)

## 💬 Contact

Feel free to connect with me on:
- GitHub: [https://github.com/rotimi2020]
- LinkedIn: [https://www.linkedin.com/in/rotimi-sheriff-omosewo-939a806b/]
- Email: [omoseworotimi@gmail.com]


