## Data Preprocessing

In [None]:
select top 10 * from callCenter

id,customer_name,sentiment,csat_score,call_timestamp,reason,city,state,channel,response_time,call_duration_in_minutes,call_center
DKK-57076809-w-055481-fU,Analise Gairdner,Neutral,7.0,2020-10-29,Billing Question,Detroit,Michigan,Call-Center,Within SLA,17,Los Angeles/CA
QGK-72219678-w-102139-KY,Crichton Kidsley,Very Positive,,2020-10-05,Service Outage,Spartanburg,South Carolina,Chatbot,Within SLA,23,Baltimore/MD
GYJ-30025932-A-023015-LD,Averill Brundrett,Negative,,2020-10-04,Billing Question,Gainesville,Florida,Call-Center,Above SLA,45,Los Angeles/CA
ZJI-96807559-i-620008-m7,Noreen Lafflina,Very Negative,1.0,2020-10-17,Billing Question,Portland,Oregon,Chatbot,Within SLA,12,Los Angeles/CA
DDU-69451719-O-176482-Fm,Toma Van der Beken,Very Positive,,2020-10-17,Payments,Fort Wayne,Indiana,Call-Center,Within SLA,23,Los Angeles/CA
JVI-79728660-U-224285-4a,Kaylyn Emlen,Neutral,5.0,2020-10-28,Billing Question,Salt Lake City,Utah,Call-Center,Within SLA,25,Baltimore/MD
AZI-95054097-e-185542-PT,Phillipe Bowring,Neutral,8.0,2020-10-16,Billing Question,Tyler,Texas,Chatbot,Within SLA,31,Baltimore/MD
TWX-27007918-I-608789-Xw,Krysta de Tocqueville,Positive,,2020-10-21,Billing Question,New York City,New York,Chatbot,Below SLA,37,Los Angeles/CA
XNG-44599118-P-344473-ZU,Oran Lifsey,Very Negative,,2020-10-03,Billing Question,Dallas,Texas,Email,Below SLA,37,Baltimore/MD
RLC-64108207-Z-285141-VS,Port Inggall,Neutral,,2020-10-07,Billing Question,Cincinnati,Ohio,Chatbot,Within SLA,12,Baltimore/MD


In [54]:
--Number of column of data 
select count(*) as NumberOfRows
from CallCenter

NumberOfRows
32941


In [55]:
--Number Of Columns 

SELECT 
    COUNT(*) AS NumberOfColumns
FROM 
    INFORMATION_SCHEMA.COLUMNS
WHERE 
    TABLE_NAME = 'callcenter';

NumberOfColumns
12


In [56]:
--Check Null values of data

select 
sum(case when cs.id is null then 1 else 0 end) as 'ID', 
sum(case when cs.call_duration_in_minutes is null then 1 else 0 end) as 'call_duration_in_minutes', 
sum(case when cs.call_timestamp is null then 1 else 0 end) as 'call_timestamp',
sum(case when cs.channel is null then 1 else 0 end) as 'channel',
sum(case when cs.city is null then 1 else 0 end) as 'city',
sum(case when cs.csat_score is null then 1 else 0 end) as 'csat_score',
sum(case when cs.customer_name is null then 1 else 0 end) as 'customer_name',
sum(case when cs.reason is null then 1 else 0 end) as 'reason',
sum(case when cs.response_time is null then 1 else 0 end) as 'response_time',
sum(case when cs.sentiment is null then 1 else 0 end) as 'sentiment',
sum(case when cs.state is null then 1 else 0 end) as 'state'
from CallCenter cs


ID,call_duration_in_minutes,call_timestamp,channel,city,csat_score,customer_name,reason,response_time,sentiment,state
0,0,0,0,0,20670,0,0,0,0,0


In [57]:
--set null values in csat_score = 11 as 11 is not scored by client
update callcenter 
set csat_score = 11
where csat_score is null

In [58]:
select sum(case when csat_score is null then 1 else 0 end) as 'csat_score' 
from CallCenter

csat_score
0


In [59]:
select distinct(city), COUNT(city) as NumberOfCityCalls  from CallCenter
group by city
order by NumberOfCityCalls DESC

city,NumberOfCityCalls
Washington,1110
Houston,657
New York City,564
El Paso,528
Dallas,437
Atlanta,416
Miami,374
Sacramento,341
Los Angeles,331
Kansas City,327


In [60]:
-- Columns DataTypes
DECLARE @TableName NVARCHAR(128) = 'CallCenter';

SELECT 
    COLUMN_NAME,
    DATA_TYPE
FROM 
    INFORMATION_SCHEMA.COLUMNS
WHERE 
    TABLE_NAME = @TableName;


COLUMN_NAME,DATA_TYPE
id,nvarchar
customer_name,nvarchar
sentiment,nvarchar
csat_score,tinyint
call_timestamp,date
reason,nvarchar
city,nvarchar
state,nvarchar
channel,nvarchar
response_time,nvarchar


##  Data Analysis



#### Number of Calls

In [61]:
select channel, count(channel) as 'Number of Calls'
from CallCenter
group by channel
order by 2

channel,Number of Calls
Web,6576
Email,7470
Chatbot,8256
Call-Center,10639


#### Number of Customers called Each Center

In [62]:
select call_center, count(call_center) as 'Number of Customers called the Center'
from CallCenter
group by call_center
order by 2 desc

call_center,Number of Customers called the Center
Los Angeles/CA,13734
Baltimore/MD,11012
Chicago/IL,5419
Denver/CO,2776


#### Number of Customers called the Center

In [63]:
select count(id) as 'Number of Customers called the Center' from CallCenter

/*
there is no duplicated in id column
*/

Number of Customers called the Center
32941


#### Number of Calls for each Reasons 

In [64]:
select (cs.reason) as 'Reason', count(cs.reason) as 'Number Of Calls'
from CallCenter cs
GROUP by reason
order by 2 desc

Reason,Number Of Calls
Billing Question,23462
Payments,4749
Service Outage,4730


#### Number of Calls for Each sentiment 

In [65]:
select (cs.sentiment), COUNT(cs.sentiment) as  'Number Of Calls'
from CallCenter cs
group by cs.sentiment
order by 2

sentiment,Number Of Calls
Very Positive,3170
Positive,3928
Very Negative,6026
Neutral,8754
Negative,11063


In [66]:
select top 1 * from CallCenter

id,customer_name,sentiment,csat_score,call_timestamp,reason,city,state,channel,response_time,call_duration_in_minutes,call_center
DKK-57076809-w-055481-fU,Analise Gairdner,Neutral,7,2020-10-29,Billing Question,Detroit,Michigan,Call-Center,Within SLA,17,Los Angeles/CA


### Cities with Number of Calls

In [67]:
select top 10(cs.city), COUNT(cs.city) as  'Number Of Calls'
from CallCenter cs
group by cs.city
order by 2 DESC

city,Number Of Calls
Washington,1110
Houston,657
New York City,564
El Paso,528
Dallas,437
Atlanta,416
Miami,374
Sacramento,341
Los Angeles,331
Kansas City,327


#### Most Cites and there states with Number of Calls

In [68]:
select top 10 cs.city, cs.[state]
from CallCenter cs 
group by cs.city, cs.[state]
order by  count(cs.call_center)

city,state
Eugene,Oregon
Yonkers,New York
Kingsport,Tennessee
Ridgely,Maryland
Migrate,Kentucky
Myrtle Beach,South Carolina
Mesquite,Texas
Homestead,Florida
Edmond,Oklahoma
Englewood,Colorado


#### Max Call Center With Customers Scores

In [77]:
select cs.call_center ,avg(cs.csat_score)
from CallCenter cs
group by call_center, csat_score

order by 1

call_center,(No column name)
Baltimore/MD,2
Baltimore/MD,8
Baltimore/MD,5
Baltimore/MD,11
Baltimore/MD,10
Baltimore/MD,4
Baltimore/MD,7
Baltimore/MD,1
Baltimore/MD,9
Baltimore/MD,6


#### Number of Customers Rate the Call Center with Max Score

In [9]:
WITH RatedCallCenters AS (
    SELECT
        call_center,
        csat_score,
        COUNT(id) AS NumberOfCustomers
    FROM
        CallCenter cs -- Replace with your actual table name
   where cs.csat_score <> 11
    GROUP BY
        call_center, csat_score
    
)
SELECT
    call_center AS Name,
    csat_score AS Rate,
    NumberOfCustomers
FROM
    RatedCallCenters
--WHERE
  --  RowNum = 1
ORDER BY
    NumberOfCustomers DESC
OFFSET 0 ROWS FETCH NEXT 5 ROWS ONLY;


Name,Rate,NumberOfCustomers
Los Angeles/CA,5,792
Los Angeles/CA,6,769
Los Angeles/CA,3,663
Baltimore/MD,5,626
Baltimore/MD,6,619


#### Each Call Center With Average Scores

In [1]:
select cs.call_center, avg(cs.csat_score) as 'Average of Scores'
from CallCenter cs  
where cs.csat_score <> 11
group by cs.call_center

call_center,Average of Scores
Baltimore/MD,5
Chicago/IL,5
Denver/CO,5
Los Angeles/CA,5


In [16]:
DECLARE @rows FLOAT 
select @rows = COUNT(*) FROM CallCenter
select cs.sentiment, count(*), round((COUNT(*) / @rows )* 100, 1) AS 'Persent%'
FROM CallCenter cs
GROUP BY sentiment
order by 2


sentiment,(No column name),Persent%
Very Positive,3170,9.6
Positive,3928,11.9
Very Negative,6026,18.3
Neutral,8754,26.6
Negative,11063,33.6


#### Percent of Calls' Reasons  

In [18]:
DECLARE @rows FLOAT 
select @rows = COUNT(*) FROM CallCenter
select cs.reason, count(*) as 'Number of calls', round((COUNT(*) / @rows )* 100, 1) AS 'Persent%'
FROM CallCenter cs
GROUP BY reason
order by 2


reason,Number of calls,Persent%
Service Outage,4730,14.4
Payments,4749,14.4
Billing Question,23462,71.2


#### Channels with Number of Calls and it's Percent

In [19]:
DECLARE @rows FLOAT 
select @rows = COUNT(*) FROM CallCenter
select cs.channel, count(*) as 'Number of calls', round((COUNT(*) / @rows )* 100, 1) AS 'Persent%'
FROM CallCenter cs
GROUP BY cs.channel
order by 2

channel,Number of calls,Persent%
Web,6576,20.0
Email,7470,22.7
Chatbot,8256,25.1
Call-Center,10639,32.3


#### Score and Number of Calls and it's Percent

In [27]:
DECLARE @rows FLOAT 
select @rows = COUNT(*) 
FROM CallCenter cs 
where cs.csat_score <> 11


select cs.csat_score, count(*) as 'Number of calls', round((COUNT(*) / @rows )* 100, 1) AS 'Persent%'
FROM CallCenter cs
where cs.csat_score <> 11
GROUP BY cs.csat_score
order by 2 desc

csat_score,Number of calls,Persent%
5,1899,15.5
6,1865,15.2
3,1575,12.8
4,1526,12.4
7,1314,10.7
8,1266,10.3
9,1092,8.9
1,595,4.8
2,571,4.7
10,568,4.6


#### Call Center and Number of Calls and it's Percent

In [28]:
DECLARE @rows FLOAT 
select @rows = COUNT(*) FROM CallCenter
select cs.call_center, count(*) as 'Number of calls', round((COUNT(*) / @rows )* 100, 1) AS 'Persent%'
FROM CallCenter cs
GROUP BY cs.call_center
order by 2


call_center,Number of calls,Persent%
Denver/CO,2776,8.4
Chicago/IL,5419,16.5
Baltimore/MD,11012,33.4
Los Angeles/CA,13734,41.7


#### Days Of Week and Number of Calls and it's Percent

In [32]:
DECLARE @rows FLOAT 
select @rows = COUNT(*) FROM CallCenter
select format(cs.call_timestamp, 'dddd') as 'Day Name', count(*) as 'Number of calls', round((COUNT(*) / @rows )* 100, 1) AS 'Persent%'
FROM CallCenter cs
GROUP BY format(cs.call_timestamp, 'dddd')
order by 2


Day Name,Number of calls,Persent%
Sunday,4296,13.0
Monday,4334,13.2
Saturday,4403,13.4
Tuesday,4408,13.4
Wednesday,4449,13.5
Thursday,5481,16.6
Friday,5570,16.9


In [36]:
DECLARE @rows FLOAT 
select @rows = COUNT(*) FROM CallCenter
select day(cs.call_timestamp) as 'Day Name', count(*) as 'Number of calls', round((COUNT(*) / @rows )* 100, 4) AS 'Persent%'
FROM CallCenter cs
GROUP BY day(cs.call_timestamp)
order by 2

Day Name,Number of calls,Persent%
31,1,0.003
7,1045,3.1723
4,1049,3.1845
29,1053,3.1966
26,1054,3.1997
27,1062,3.2239
25,1063,3.227
8,1067,3.2391
20,1077,3.2695
2,1084,3.2907


#### Quarters and Number of Calls and it's Percent

In [42]:

with DateQuartiles as(
    select cs.call_timestamp, NTILE(4) over(order by cs.call_timestamp) as Quartile
    from CallCenter cs
)

SELECT Quartile, count(*) as 'Number of calls', round((COUNT(*) / cast((select COUNT(*) FROM CallCenter) as float))* 100, 2) AS 'Persent%'
from DateQuartiles
GROUP by Quartile

Quartile,Number of calls,Persent%
1,8236,25
2,8235,25
3,8235,25
4,8235,25


#### Min Score, Max Score, and Average of Scores

In [44]:
select min(cs.csat_score) as 'Min Score', max(cs.csat_score) as 'Max Score', avg(cs.csat_score) as 'Average Score'
from CallCenter cs 
where cs.csat_score <> 11

Min Score,Max Score,Average Score
1,10,5


#### Response Time  for each Call Center and Number of Calls

In [48]:
SELECT call_center, response_time, COUNT(*) AS 'Number of Calls'
FROM CallCenter
GROUP BY call_center, response_time
ORDER BY 1,3 DESC;

call_center,response_time,Number of Calls
Baltimore/MD,Within SLA,6855
Baltimore/MD,Below SLA,2768
Baltimore/MD,Above SLA,1389
Chicago/IL,Within SLA,3361
Chicago/IL,Below SLA,1361
Chicago/IL,Above SLA,697
Denver/CO,Within SLA,1741
Denver/CO,Below SLA,692
Denver/CO,Above SLA,343
Los Angeles/CA,Within SLA,8668


#### Average of Call Duration for Each Sentiment

In [51]:
SELECT sentiment, AVG(cs.call_duration_in_minutes) as 'Average of call duration in minutes' 
FROM CallCenter cs 
GROUP BY sentiment
ORDER BY 2 DESC;


sentiment,Average of call duration in minutes
Negative,25
Neutral,24
Very Positive,24
Positive,24
Very Negative,24


#### Average of Score for Each State

In [52]:
SELECT state, AVG(csat_score) as avg_csat_score 
FROM CallCenter cs 
WHERE csat_score != 0 and cs.csat_score <> 11
GROUP BY state 
ORDER BY 2 DESC;


state,avg_csat_score
Wyoming,6
North Dakota,6
Vermont,6
Idaho,5
Georgia,5
Connecticut,5
New Jersey,5
Michigan,5
Iowa,5
North Carolina,5


#### Number of Calls for Each State and Sentiment 

In [54]:
SELECT state, sentiment , COUNT(*) as 'Number of Calls'
FROM CallCenter GROUP BY state, sentiment
ORDER BY 1,3 DESC;


state,sentiment,Number of Calls
Alabama,Negative,265
Alabama,Neutral,200
Alabama,Very Negative,129
Alabama,Positive,74
Alabama,Very Positive,70
Alaska,Negative,45
Alaska,Neutral,32
Alaska,Very Negative,28
Alaska,Positive,25
Alaska,Very Positive,16


#### Number of Calls for each Reason and Reason 

In [55]:
SELECT state, reason, COUNT(*) as 'Number of Calls' FROM CallCenter cs 
GROUP BY state, reason 
ORDER BY 1,2,3 DESC;

state,reason,Number of Calls
Alabama,Billing Question,538
Alabama,Payments,105
Alabama,Service Outage,95
Alaska,Billing Question,93
Alaska,Payments,25
Alaska,Service Outage,28
Arizona,Billing Question,507
Arizona,Payments,119
Arizona,Service Outage,111
Arkansas,Billing Question,152


#### Number of Calls for Each State

In [58]:
SELECT state, COUNT(*) as 'Number of Calls'
FROM CallCenter 
GROUP BY state 
ORDER BY 2 DESC;


state,Number of Calls
California,3631
Texas,3572
Florida,2834
New York,1786
Virginia,1164
Ohio,1160
District of Columbia,1110
Pennsylvania,1017
Georgia,926
Illinois,848
