# Import Libraries

In [1]:
# Load libraries
import numpy as np
from matplotlib import pyplot
from pandas import read_csv 
from pandas import set_option
set_option('display.max_rows', 500)
set_option('display.max_columns', 500)
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime
import pyodbc
from pandas.plotting import scatter_matrix
import seaborn as sns
import warnings
warnings.filterwarnings("ignore")

# Connect to SQL Server

In [2]:
pyodbc.drivers()

['SQL Server', 'ODBC Driver 17 for SQL Server']

In [3]:
connection = pyodbc.connect(
"Driver={ODBC Driver 17 for SQL Server};"
"Server=DESKTOP-ROTIMI\SQLEXPRESS;"
"Database=LMS;"
"Trusted_Connection=yes;")

# load dataset

#### Extracting data from SQL Server

In [4]:
# View data from SQL Server
Lms_Members = pd.read_sql("Select * from [dbo].[LMS_MEMBERS]",connection)

# Lms_Members Table

#### View Data

In [5]:
# head
Lms_Members.head()

Unnamed: 0,MEMBER_ID,MEMBER_NAME,CITY,DATE_REGISTER,DATE_EXPIRE,MEMBERSHIP_STATUS
0,LM001,Akshay,CHENNAI,2018-06-12,2020-10-26,Temporary
1,LM002,Amruta,PUNE,2020-03-02,2020-06-03,Temporary
2,LM003,Ashish,CHENNAI,2018-06-12,2020-08-17,Permanent
3,LM004,Bhakti,CHENNAI,2018-06-12,2020-05-12,Temporary
4,LM005,Gautam,BANGALORE,2018-06-12,2020-11-06,Temporary


#### shape of the data

In [6]:
# shape
print(Lms_Members.shape)


(25, 6)


In [7]:
#Basic info
print(Lms_Members.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 25 entries, 0 to 24
Data columns (total 6 columns):
 #   Column             Non-Null Count  Dtype 
---  ------             --------------  ----- 
 0   MEMBER_ID          25 non-null     object
 1   MEMBER_NAME        25 non-null     object
 2   CITY               25 non-null     object
 3   DATE_REGISTER      25 non-null     object
 4   DATE_EXPIRE        25 non-null     object
 5   MEMBERSHIP_STATUS  25 non-null     object
dtypes: object(6)
memory usage: 1.3+ KB
None


#### types of the data column

In [8]:
# types
print(Lms_Members.dtypes)


MEMBER_ID            object
MEMBER_NAME          object
CITY                 object
DATE_REGISTER        object
DATE_EXPIRE          object
MEMBERSHIP_STATUS    object
dtype: object


#### data descriptions

In [9]:
# Summary statistics (numerical columns)
Lms_Members.describe()


Unnamed: 0,MEMBER_ID,MEMBER_NAME,CITY,DATE_REGISTER,DATE_EXPIRE,MEMBERSHIP_STATUS
count,25,25,25,25,25,25
unique,25,25,8,8,10,2
top,LM001,Akshay,CHENNAI,2018-06-12,2020-10-26,Permanent
freq,1,1,8,13,4,13


#### Check for missing entries

In [10]:
# Check for missing entries
total = Lms_Members.isnull().sum().sort_values(ascending=False)
percent=(Lms_Members.isnull().sum()*100/len(Lms_Members)).sort_values(ascending=False)
missing_data = pd.concat([total, percent], axis=1,keys=['Total', 'Percent'])
missing_data.head(20)

Unnamed: 0,Total,Percent
MEMBER_ID,0,0.0
MEMBER_NAME,0,0.0
CITY,0,0.0
DATE_REGISTER,0,0.0
DATE_EXPIRE,0,0.0
MEMBERSHIP_STATUS,0,0.0


#### Check for duplicate data

In [11]:
# locate rows of duplicate data

# calculate duplicates
dups = Lms_Members.duplicated()
# report if there are any duplicates
print(dups.any())


False


In [12]:
# Convert to datetime
Lms_Members['DATE_REGISTER'] = pd.to_datetime(Lms_Members['DATE_REGISTER'])
Lms_Members['DATE_EXPIRE'] = pd.to_datetime(Lms_Members['DATE_EXPIRE'])

In [13]:
Lms_Members.head()

Unnamed: 0,MEMBER_ID,MEMBER_NAME,CITY,DATE_REGISTER,DATE_EXPIRE,MEMBERSHIP_STATUS
0,LM001,Akshay,CHENNAI,2018-06-12,2020-10-26,Temporary
1,LM002,Amruta,PUNE,2020-03-02,2020-06-03,Temporary
2,LM003,Ashish,CHENNAI,2018-06-12,2020-08-17,Permanent
3,LM004,Bhakti,CHENNAI,2018-06-12,2020-05-12,Temporary
4,LM005,Gautam,BANGALORE,2018-06-12,2020-11-06,Temporary


# Analysis

In [14]:
# Top City
Lms_Members['CITY'].value_counts()

CITY
CHENNAI      8
Bangalore    3
Delhi        3
Kerala       3
PUNE         2
BANGALORE    2
Pune         2
Punjab       2
Name: count, dtype: int64

In [15]:
# MEMBERSHIP_STATUS Type
Lms_Members['MEMBERSHIP_STATUS'].value_counts()

MEMBERSHIP_STATUS
Permanent    13
Temporary    12
Name: count, dtype: int64

In [18]:
# Export Lms_Members data to CSV without index
Lms_Members.to_csv("Lms_Members.csv",index =False)

## 💬 Contact

Feel free to connect with me on:
- GitHub: [https://github.com/rotimi2020]
- LinkedIn: [https://www.linkedin.com/in/rotimi-sheriff-omosewo-939a806b/]
- Email: [omoseworotimi@gmail.com]


