# Import Libraries

In [1]:
# Load libraries
import numpy as np
from matplotlib import pyplot
from pandas import read_csv 
from pandas import set_option
set_option('display.max_rows', 500)
set_option('display.max_columns', 500)
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime
import pyodbc
from pandas.plotting import scatter_matrix
import seaborn as sns
import warnings
warnings.filterwarnings("ignore")

# Connect to SQL Server

In [2]:
pyodbc.drivers()

['SQL Server', 'ODBC Driver 17 for SQL Server']

In [3]:
connection = pyodbc.connect(
"Driver={ODBC Driver 17 for SQL Server};"
"Server=DESKTOP-ROTIMI\SQLEXPRESS;"
"Database=LMS;"
"Trusted_Connection=yes;")

# load dataset

#### Extracting data from SQL Server

In [4]:
# View data from SQL Server
Suppliers_Details = pd.read_sql("Select * from [dbo].[LMS_SUPPLIERS_DETAILS]",connection)

# Suppliers_Details Table

#### View Data

In [5]:
# head
Suppliers_Details.head()

Unnamed: 0,SUPPLIER_ID,SUPPLIER_NAME,ADDRESS,CONTACT,EMAIL
0,S01,SINGAPORE SHOPPEE,CHENNAI,9894123555,sing@gmail.com
1,S02,JK Stores,MUMBAI,9940123450,jks@yahoo.com
2,S03,ROSE BOOK STORE,TRIVANDRUM,9444411222,rose@gmail.com
3,S04,KAVARI STORE,DELHI,8630001452,kavi@redif.com
4,S05,EINSTEN BOOK GALLARY,US,9542000001,eingal@aol.com


#### shape of the data

In [6]:
# shape
print(Suppliers_Details.shape)


(12, 5)


In [7]:
#Basic info
print(Suppliers_Details.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12 entries, 0 to 11
Data columns (total 5 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   SUPPLIER_ID    12 non-null     object
 1   SUPPLIER_NAME  12 non-null     object
 2   ADDRESS        12 non-null     object
 3   CONTACT        12 non-null     int64 
 4   EMAIL          12 non-null     object
dtypes: int64(1), object(4)
memory usage: 612.0+ bytes
None


#### types of the data column

In [8]:
# types
print(Suppliers_Details.dtypes)


SUPPLIER_ID      object
SUPPLIER_NAME    object
ADDRESS          object
CONTACT           int64
EMAIL            object
dtype: object


#### data descriptions

In [9]:
# Summary statistics (numerical columns)
Suppliers_Details.describe()


Unnamed: 0,CONTACT
count,12.0
mean,9219417000.0
std,778369800.0
min,7855623000.0
25%,8640501000.0
50%,9493756000.0
75%,9895349000.0
max,9940123000.0


#### data descriptions

In [10]:
# descriptions
Suppliers_Details.describe(include = 'object')


Unnamed: 0,SUPPLIER_ID,SUPPLIER_NAME,ADDRESS,EMAIL
count,12,12,12,12
unique,12,12,5,12
top,S01,SINGAPORE SHOPPEE,MUMBAI,sing@gmail.com
freq,1,1,4,1


#### Check for missing entries

In [11]:
# Check for missing entries
total = Suppliers_Details.isnull().sum().sort_values(ascending=False)
percent=(Suppliers_Details.isnull().sum()*100/len(Suppliers_Details)).sort_values(ascending=False)
missing_data = pd.concat([total, percent], axis=1,keys=['Total', 'Percent'])
missing_data.head(20)

Unnamed: 0,Total,Percent
SUPPLIER_ID,0,0.0
SUPPLIER_NAME,0,0.0
ADDRESS,0,0.0
CONTACT,0,0.0
EMAIL,0,0.0


#### Check for duplicate data

In [12]:
# locate rows of duplicate data

# calculate duplicates
dups = Suppliers_Details.duplicated()
# report if there are any duplicates
print(dups.any())


False


# Analysis

In [13]:
Suppliers_Details.head()

Unnamed: 0,SUPPLIER_ID,SUPPLIER_NAME,ADDRESS,CONTACT,EMAIL
0,S01,SINGAPORE SHOPPEE,CHENNAI,9894123555,sing@gmail.com
1,S02,JK Stores,MUMBAI,9940123450,jks@yahoo.com
2,S03,ROSE BOOK STORE,TRIVANDRUM,9444411222,rose@gmail.com
3,S04,KAVARI STORE,DELHI,8630001452,kavi@redif.com
4,S05,EINSTEN BOOK GALLARY,US,9542000001,eingal@aol.com


In [14]:
# Top Book Category
Suppliers_Details['SUPPLIER_ID'].value_counts()

SUPPLIER_ID
S01    1
S02    1
S03    1
S04    1
S05    1
S06    1
S07    1
S08    1
S09    1
S10    1
S11    1
S12    1
Name: count, dtype: int64

In [15]:
# Top Book Category
Suppliers_Details['ADDRESS'].value_counts()

ADDRESS
MUMBAI        4
CHENNAI       2
TRIVANDRUM    2
DELHI         2
US            2
Name: count, dtype: int64

In [18]:
# Export Suppliers_Details data to CSV without index
Suppliers_Details.to_csv("Suppliers_Details.csv",index =False)

## 💬 Contact

Feel free to connect with me on:
- GitHub: [https://github.com/rotimi2020]
- LinkedIn: [https://www.linkedin.com/in/rotimi-sheriff-omosewo-939a806b/]
- Email: [omoseworotimi@gmail.com]


