## 1. SQL example
   - Create a data frame 'my_data' with columns: "employee_name", "department", "salary"
   - Use it to create a sqlite3 table 'my_data' under 'chinook.db' which can be download from 
     -  https://www.sqlitetutorial.net/sqlite-sample-database/                                        
   - Rank the salary by using window function rank() 
   - Rank the salary by using window function dense_rank()                                           
   - Apply row_column() to get the employee_name with the highest salary
                                     

In [1]:
import sqlite3
import pandas as pd

### Establish connection

In [2]:
path = 'C:\\Users\\sophia.yue\\Infosys\Data\\'
conn = sqlite3.connect(path + 'chinook.db')
# create a lambda function to execute sql 
exec_sql = lambda title, sql_cmd: print(f'{title} \n {pd.read_sql_query(sql_cmd, conn)}') 

# Get table attributes 
def tbl_attr(tbl_nam):
    tbl_attr = f'PRAGMA table_info({tbl_nam})'
    exec_sql (f"Attritube of {tbl_nam}", tbl_attr)

### Create a table from a data frame

In [3]:
simp_data = (("James", "Sales", 3000), \
    ("Michael", "Sales", 4600),  \
    ("Robert", "Sales", 4100),   \
    ("Maria", "Finance", 3000),  \
    ("James", "Sales", 3000),    \
    ("Scott", "Finance", 3300),  \
    ("Jen", "Finance", 3900),    \
    ("Jeff", "Marketing", 3000), \
    ("Kumar", "Marketing", 2000),\
    ("Saif", "Sales", 4100) \
  )
 
columns= ["employee_name", "department", "salary"]
my_data = pd.DataFrame(data = simp_data, columns = columns)
my_data

Unnamed: 0,employee_name,department,salary
0,James,Sales,3000
1,Michael,Sales,4600
2,Robert,Sales,4100
3,Maria,Finance,3000
4,James,Sales,3000
5,Scott,Finance,3300
6,Jen,Finance,3900
7,Jeff,Marketing,3000
8,Kumar,Marketing,2000
9,Saif,Sales,4100


In [4]:
my_data.to_sql("my_data", conn, if_exists="replace", index = False)
tbl_attr('my_data')
sql = """select * from my_data order by salary desc
"""
exec_sql (f"employees top 10", sql)

Attritube of my_data 
    cid           name     type  notnull dflt_value  pk
0    0  employee_name     TEXT        0       None   0
1    1     department     TEXT        0       None   0
2    2         salary  INTEGER        0       None   0
employees top 10 
   employee_name department  salary
0       Michael      Sales    4600
1        Robert      Sales    4100
2          Saif      Sales    4100
3           Jen    Finance    3900
4         Scott    Finance    3300
5         James      Sales    3000
6         Maria    Finance    3000
7         James      Sales    3000
8          Jeff  Marketing    3000
9         Kumar  Marketing    2000


### 1.1 Rank the salary by using window function rank() 


In [5]:
sql = """
select *, rank() over (order by salary desc) salary_rank from my_data
"""
exec_sql (f"rank", sql)

rank 
   employee_name department  salary  salary_rank
0       Michael      Sales    4600            1
1        Robert      Sales    4100            2
2          Saif      Sales    4100            2
3           Jen    Finance    3900            4
4         Scott    Finance    3300            5
5         James      Sales    3000            6
6         Maria    Finance    3000            6
7         James      Sales    3000            6
8          Jeff  Marketing    3000            6
9         Kumar  Marketing    2000           10


### 1.2 Rank the salary by using window function dense_rank()                                           


In [6]:
sql = """
select *, dense_rank() over (order by salary desc) salary_rank from my_data
"""
exec_sql (f"dense_rank", sql)

dense_rank 
   employee_name department  salary  salary_rank
0       Michael      Sales    4600            1
1        Robert      Sales    4100            2
2          Saif      Sales    4100            2
3           Jen    Finance    3900            3
4         Scott    Finance    3300            4
5         James      Sales    3000            5
6         Maria    Finance    3000            5
7         James      Sales    3000            5
8          Jeff  Marketing    3000            5
9         Kumar  Marketing    2000            6


### 1.3 Apply row_column() to get the employee_name with the highest salary

In [7]:
sql= """
select * from 
(select *, row_number() over (partition  by department order by salary desc) as seqnum
          from my_data) tmp
where seqnum = 1 
"""
exec_sql (f"department top salary", sql)

department top salary 
   employee_name department  salary  seqnum
0           Jen    Finance    3900       1
1          Jeff  Marketing    3000       1
2       Michael      Sales    4600       1


## 2. Get counts of a string 
   - Write a function get counts of each character from a string, e.g.,
   - Write a function get counts of consecutive characters from a string

### 2.1 Write a function get count of each character from a string

In [8]:
 def cnt_char(str):
     """
      Purpose: Get count of consecutive characters from a string
     """   
     l_str = list(char for char in str) # convert  a str to a list of char
     # get count of char
     l_str_cnt = list(set((k,l_str.count(k)) for k in l_str))
     print("Given string: ", str)
     print(f'Count of character in a given string: {l_str_cnt}')    
   

In [9]:
cnt_char(str = "XXXXYYYZZWX") 
cnt_char(str = "XXXXYYYZZ") 
cnt_char(str = "XXXXYYYZZX")
cnt_char(str = "")

Given string:  XXXXYYYZZWX
Count of character in a given string: [('X', 5), ('W', 1), ('Y', 3), ('Z', 2)]
Given string:  XXXXYYYZZ
Count of character in a given string: [('X', 4), ('Z', 2), ('Y', 3)]
Given string:  XXXXYYYZZX
Count of character in a given string: [('X', 5), ('Y', 3), ('Z', 2)]
Given string:  
Count of character in a given string: []


### 2.1 Write a function get count of consecutive characters from a string

In [10]:
def cnt_cons_char(str):
    """
     Purpose: Get count of consecutive characters from a string
    """
    str = re.sub(' +', '', str)  
    l_cons_str = [] 
    k = 0
    for i in range(0, len(str)):  
        count = 1;      
        if k == len(str):
           break 
            
        for j in range(k+1, len(str)):  
            if(str[k] == str[j] ):  
                count = count + 1;                
            else:
                if count == 1:
                   k = j 
                   break
                else: 
                   l_cons_str.append((str[k], count))         
                   k = j 
                   break
    print("Given string: ", str)
    print(f'Count of duplicate characters in a given string: {l_cons_str}') 

In [11]:
import re
cnt_cons_char(str = "XXXXYYYZZWX") 
cnt_cons_char(str = "XXXXYYYZZX")
cnt_cons_char(str = "XXXXYYYWZZX")

Given string:  XXXXYYYZZWX
Count of duplicate characters in a given string: [('X', 4), ('Y', 3), ('Z', 2)]
Given string:  XXXXYYYZZX
Count of duplicate characters in a given string: [('X', 4), ('Y', 3), ('Z', 2)]
Given string:  XXXXYYYWZZX
Count of duplicate characters in a given string: [('X', 4), ('Y', 3), ('Z', 2)]
