## SQL Connection logic

In [1]:
import pandas as pd
from sqlalchemy import create_engine
import os

ModuleNotFoundError: No module named 'sqlalchemy'

In [9]:
%load_ext sql
%sql postgresql://toofanmacpro@localhost:5432/testdb


The sql extension is already loaded. To reload it, use:
  %reload_ext sql


In [10]:
%%sql
SELECT * FROM "Employee" LIMIT 5;

 * postgresql://toofanmacpro@localhost:5432/testdb
3 rows affected.


id,salary
1,100
2,200
3,300


In [11]:

def toDB(da, tableName):
    try:
        engine = create_engine('postgresql://toofanmacpro@localhost:5432/testdb')
        try: 
            da.to_sql(tableName, engine, if_exists= 'replace')
        except Exception as e:
            print(f"error occurred: {e}")
            
    except Exception as e:
        print("error occured at create_engine: {e}")

In [12]:
id = [1,2,3]
salary = [100,200,300]

df = pd.DataFrame({'id':id, 'salary': salary})

In [13]:
toDB(df,'Employee')

## Find second highest salary

-- if not found, return null

### SQL Solution - mine

_first lets bring out unique salary values and sort them_

In [22]:
%%sql 
SELECT 
    DISTINCT salary 
FROM 
    "Employee"
ORDER BY salary;

 * postgresql://toofanmacpro@localhost:5432/testdb
3 rows affected.


salary
100
200
300


Combination of LIMIT and OFFSET can be used to get desired output.

OFFSET skips the specified number of rows » so, we skip the top row  
LIMIT can be used to select » once second best becomes top, we limit result to 1

In [None]:
%%sql
SELECT 
    DISTINCT salary AS SecondHighestSalary
FROM 
    "Employee"
ORDER BY salary
LIMIT 1 OFFSET 1;

 * postgresql://toofanmacpro@localhost:5432/testdb
1 rows affected.


secondhighestsalary
200


_now lets deal with the null if not found issue_

In [26]:
%%sql
SELECT(
    SELECT 
        DISTINCT salary
    FROM 
        "Employee"
    ORDER BY salary
    LIMIT 1 OFFSET 1
) AS SecondHighestSalary;

 * postgresql://toofanmacpro@localhost:5432/testdb
1 rows affected.


secondhighestsalary
200


### My pandas solution

In [59]:
import numpy as np

salaries = (df
            .salary
            .sort_values(ascending = False)
            .unique()
            )

secondHigh = int(salaries[1])
da = pd.DataFrame({"secondHighestSalary":secondHigh}, index = [0])

da

Unnamed: 0,secondHighestSalary
0,200


### Pandas - Best solution:

In [72]:
unique = (df
          .salary
          .drop_duplicates()
          .nlargest(2)
          )

if (len(unique) < 2):
    print(pd.DataFrame({"SecondHighestSalary": [None]}, index = [0]))
else:
    print(pd.DataFrame({"SecondHighestSalary":[unique.iloc[1]]}, index = [0]))

   SecondHighestSalary
0                  200


## Nth Largest

-- use cases include 0 and -1

### My Pandas solution

In [113]:
def nLargest(daa: pd.DataFrame, N: int) -> pd.DataFrame: # '->' indicates what the output should be, and ':' defines what the inputs should be
    if N <= 0 : 
        return (pd.DataFrame({"SecondHighestSalary": [None]}, index = [0]))

    unique = (daa
          .sort_values(by = 'salary', ascending=False)
          .salary
          .drop_duplicates()
          .nlargest(N)
          )

    print(unique)
    
    if (len(unique) < N):
        return (pd.DataFrame({"SecondHighestSalary": [None]}, index = [0]))
    else:
        return (pd.DataFrame({"SecondHighestSalary":[unique.iloc[-1]]}, index = [0]))

In [114]:
nLargest(df,2)

2    300
1    200
Name: salary, dtype: int64


Unnamed: 0,SecondHighestSalary
0,200


In [115]:
nLargest(df,0)

Unnamed: 0,SecondHighestSalary
0,


In [116]:
nLargest(df,-10)

Unnamed: 0,SecondHighestSalary
0,


###  MY SQL Query of NthLargest

In [108]:
%%sql
SELECT 
    DISTINCT salary
FROM "Employee"
ORDER BY salary DESC
LIMIT 1 OFFSET 1;

 * postgresql://toofanmacpro@localhost:5432/testdb
1 rows affected.


salary
200


In [141]:
%%sql
CREATE OR REPLACE FUNCTION NthLargest(N INT) RETURNS TABLE (salary BIGINT) AS $$
BEGIN
RETURN QUERY (
    SELECT
        CASE WHEN N <= 0 THEN NULL
        ELSE 
            (
                SELECT 
                    DISTINCT e.salary
                FROM "Employee" AS e
                ORDER BY e.salary DESC
                LIMIT 1 OFFSET (N-1)
            )
        END AS salary
    );
END;
$$ LANGUAGE plpgsql;

 * postgresql://toofanmacpro@localhost:5432/testdb
Done.


[]

In [142]:
%%sql
SELECT * FROM NthLargest(1)

 * postgresql://toofanmacpro@localhost:5432/testdb
1 rows affected.


salary
300


IF a funciton has to be dropped

In [None]:
# %%sql
# DROP FUNCTION nthlargest(integer)

 * postgresql://toofanmacpro@localhost:5432/testdb
Done.


[]

In [143]:
%%sql
SELECT * FROM NthLargest(-1)

 * postgresql://toofanmacpro@localhost:5432/testdb
1 rows affected.


salary
""


### Nth Highest = SQL - best solution

In [161]:
%%sql
CREATE OR REPLACE FUNCTION NthBigSalary(N INT) RETURNS TABLE(salary BIGINT) AS $$
BEGIN
    IF N < 1 THEN 
        RETURN QUERY (SELECT NULL::BIGINT AS salary);
    ELSE
        RETURN QUERY(
            SELECT 
                e.salary
            FROM "Employee" AS e
            GROUP BY e.salary
            ORDER BY e.salary DESC
            OFFSET N-1 LIMIT 1
        );
    END IF;
END;
$$ LANGUAGE plpgsql;

 * postgresql://toofanmacpro@localhost:5432/testdb
Done.


[]

we used BIGINT because thats how our data is stored

In [162]:
%%sql
SELECT * FROM NthBigSalary(2)

 * postgresql://toofanmacpro@localhost:5432/testdb
1 rows affected.


salary
200


In [163]:
%%sql
SELECT * FROM NthBigSalary(-1)

 * postgresql://toofanmacpro@localhost:5432/testdb
1 rows affected.


salary
""


In [164]:
%%sql
SELECT * FROM NthBigSalary(0)

 * postgresql://toofanmacpro@localhost:5432/testdb
1 rows affected.


salary
""


## DENSE_RANK() -- different implementations

In [167]:
Scores = pd.DataFrame({'id':range(1,7), 'score':[3.50,3.65,4.00,3.85,4.00,3.65]})

Scores

Unnamed: 0,id,score
0,1,3.5
1,2,3.65
2,3,4.0
3,4,3.85
4,5,4.0
5,6,3.65


In [168]:
toDB(Scores,'Scores')

In [170]:
%%sql
SELECT * FROM "Scores";

 * postgresql://toofanmacpro@localhost:5432/testdb
6 rows affected.


index,id,score
0,1,3.5
1,2,3.65
2,3,4.0
3,4,3.85
4,5,4.0
5,6,3.65


In [227]:
%%sql
SELECT s1.score, 
       (SELECT COUNT(DISTINCT s2.score) 
        FROM "Scores" s2 
        WHERE s2.score >= s1.score) AS rank
FROM "Scores" s1
ORDER BY s1.score DESC;

 * postgresql://toofanmacpro@localhost:5432/testdb
6 rows affected.


score,rank
4.0,1
4.0,1
3.85,2
3.65,3
3.65,3
3.5,4


Explanation: 

Things to keep in mind: SQL's logical query processing order or execution order

1. FROM and JOIN
2. WHERE
3. GROUP BY
4. HAVING
5. SELECT
6. DISTINCT
7. ORDER BY
8. LIMIT/OFFSET (TOP, FETCH FIRST in SQL Server)

So,   
first: SELECT would choose each score1 from table 1  

second: it will do multiple things in this order:  

» filter the new table to have only values greater than or equal to score1   
- _(ex: >= 4 --> 4,4 = 2 rows) ( >= 3.85 --> 4,4,3.85 = 3 rows) ( >= 3.65 --> 4,4,3.85,3.65,3.65 = 5 rows)_  

» then COUNT the number of distinct scores in that table   
- _(Distinct count = 1 -- 4's repeptitions) (distinct count = 2 -- 4's repeptitions) (distinct count = 3 -- 4's and 3.65's repetetions)_  

» use that as rank   
- _(4 is ranked 1) (3.85 is ranked 2) (3.65's are ranked 3)_  

Scores.order_values(['score'])

In [188]:
Scores.sort_values(by = ['score'], inplace = True, ascending= False)

In [194]:
Scores.assign(
    rank = Scores.score.rank(method = 'dense', ascending = False)
)

Unnamed: 0,id,score,rank
2,3,4.0,1.0
4,5,4.0,1.0
3,4,3.85,2.0
1,2,3.65,3.0
5,6,3.65,3.0
0,1,3.5,4.0


### Dense Rank in python

In [208]:
def denseRnk(s:pd.DataFrame) -> pd.DataFrame:
   s = s.assign(
            rank = s.score.rank(
                method = 'dense',
                ascending = False
            )
        )
   return s[['score', 'rank']].sort_values(by=['rank'])

In [209]:
denseRnk(Scores)

Unnamed: 0,score,rank
2,4.0,1.0
4,4.0,1.0
3,3.85,2.0
1,3.65,3.0
5,3.65,3.0
0,3.5,4.0


In [225]:
%%sql
SELECT 
    s1.score,
    COUNT(DISTINCT s2.score) AS rank
FROM "Scores" s1
LEFT JOIN "Scores" s2 
ON s1.score <= s2.score
GROUP BY s1.score
ORDER BY s1.score DESC;

 * postgresql://toofanmacpro@localhost:5432/testdb
4 rows affected.


score,rank
4.0,1
3.85,2
3.65,3
3.5,4


In [234]:
%%sql
SELECT
    s1.index,
    s1.score,
    COUNT(DISTINCT s2.score) AS rank
FROM "Scores" s1
LEFT JOIN "Scores" s2
    ON s1.score <= s2.score
GROUP BY s1.index, s1.score
ORDER BY s1.score DESC;

 * postgresql://toofanmacpro@localhost:5432/testdb
6 rows affected.


index,score,rank
2,4.0,1
4,4.0,1
3,3.85,2
1,3.65,3
5,3.65,3
0,3.5,4


In [229]:
%%sql
SELECT * FROM "Scores";

 * postgresql://toofanmacpro@localhost:5432/testdb
6 rows affected.


index,id,score
0,1,3.5
1,2,3.65
2,3,4.0
3,4,3.85
4,5,4.0
5,6,3.65


#### Dense ranking with Joins

In [242]:
%%sql
SELECT s1.score, COUNT(DISTINCT s2.score) AS rank
FROM "Scores" s1
LEFT JOIN "Scores" s2 ON s1.score <= s2.score
GROUP BY s1.id, s1.score
ORDER BY s1.score DESC;

 * postgresql://toofanmacpro@localhost:5432/testdb
6 rows affected.


score,rank
4.0,1
4.0,1
3.85,2
3.65,3
3.65,3
3.5,4


#### Sparse ranking with Joins

In [239]:
%%sql
SELECT s1.score, COUNT(s2.score)+1 AS rank
FROM "Scores" s1
LEFT JOIN "Scores" s2 ON s1.score < s2.score
GROUP BY s1.id, s1.score
ORDER BY s1.score DESC;

 * postgresql://toofanmacpro@localhost:5432/testdb
6 rows affected.


score,rank
4.0,1
4.0,1
3.85,3
3.65,4
3.65,4
3.5,6


In [262]:
%%sql
SELECT 
    s1.score,
    (
        SELECT
            COUNT(s2.score) + 1 AS rank
        FROM "Scores" AS s2
        WHERE s1.score < s2.score
    )
FROM "Scores" AS s1
GROUP BY s1.id, s1.score
ORDER BY s1.score DESC;

 * postgresql://toofanmacpro@localhost:5432/testdb
6 rows affected.


score,rank
4.0,1
4.0,1
3.85,3
3.65,4
3.65,4
3.5,6


## Largest Number - python

In [None]:
from typing import List
import itertools

class Solution:

    def largestNumber(self, nums: List[int]) -> str:
        num_strings = [str(num) for num in nums]
        
        num_strings.sort(key = lambda a: a*10, reverse = True)                    
  

In [327]:
a = [10,2]

In [329]:
strA = [str(num) for num in a]

strA

['10', '2']

In [None]:
strA.sort(key=lambda a: a*10, reverse = True)

strA

['2', '10']

In [326]:
permN

<itertools.permutations at 0x1233093a0>