In [None]:
!pip install pandas
!pip install pyspark
!pip install pyarrow

In [None]:
from pyspark.sql import SparkSession
 
# Building the SparkSession and name 
# it :'pandas to spark'
spark = SparkSession.builder.appName(
  "pandas to spark").getOrCreate()

spark.conf.set("spark.sql.execution.arrow.pyspark.enabled", "true")

In [2]:
import pandas as pd

data = [[1, 10, 5, 10, 10], [2, 20, 20, 20, 20], [3, 10, 30, 20, 20], [4, 10, 40, 40, 40]]
insurance = pd.DataFrame(data, columns=['pid', 'tiv_2015', 'tiv_2016', 'lat', 'lon']).astype({'pid':'Int64', 'tiv_2015':'Float64', 'tiv_2016':'Float64', 'lat':'Float64', 'lon':'Float64'})

In [3]:
insurance = spark.createDataFrame(insurance)
insurance.show()

+---+--------+--------+----+----+
|pid|tiv_2015|tiv_2016| lat| lon|
+---+--------+--------+----+----+
|  1|    10.0|     5.0|10.0|10.0|
|  2|    20.0|    20.0|20.0|20.0|
|  3|    10.0|    30.0|20.0|20.0|
|  4|    10.0|    40.0|40.0|40.0|
+---+--------+--------+----+----+



In [4]:
from pyspark.sql.functions import count, col, sum, round

investment = insurance \
            .groupby('tiv_2015') \
            .agg(count('pid').alias('count')) \
            .where('count > 1')

location = insurance \
            .groupby(['lat', 'lon']) \
            .agg(count('pid').alias('count')) \
            .where('count == 1')

report = insurance \
         .where(col('tiv_2015').isin(investment.toPandas()['tiv_2015'].to_list()) \
                & col('lat').isin(location.toPandas()['lat'].to_list()) \
                & col('lon').isin(location.toPandas()['lon'].to_list())   ) \
         .agg(round(sum('tiv_2016'), 2).alias('tiv_2016'))

report.show()

                                                                                

+--------+
|tiv_2016|
+--------+
|    45.0|
+--------+



In [5]:
# Shunting Yard Algorithm 

infix_expression = '(3 + 4) - (2 + 5) - (1 - 5)'

output, operators = [], []

precedences = {'+' : 0, '-' : 0, '*' : 1, '/' : 1}

for value in infix_expression:
    if ord('0') <= ord(value) <= ord('9'):
        output.append(value)
    elif value in precedences.keys():
        while len(operators) > 0 and operators[0] != '(' and precedences[operators[0]] >= precedences[value]:
            output.append(operators.pop(0))
        operators.insert(0, value)
    elif value == '(':
        operators.insert(0, value)
    elif value == ')':
        while len(operators) > 0 and operators[0] != '(':
            output.append(operators.pop(0))
        if len(operators) > 0 and operators[0] == '(':
            operators.pop(0)

while len(operators) > 0 and operators[0] != '(':
    output.append(operators.pop(0))

postfix_expression = ' '.join(output)

print(postfix_expression)


3 4 + 2 5 + - 1 5 - -


In [6]:
# Calculation
def operate(value_1, operator, value_2):
    if operator == '+':
        return value_1 + value_2
    elif operator == '-':
        return value_1 - value_2
    elif operator == '/':
        return value_1 / value_2
    elif operator == '*':
        return value_1 * value_2
    
output = []

for value in postfix_expression:
    if ord('0') <= ord(value) <= ord('9'):
        output.append(int(value))
    elif value in precedences.keys():
        item_1 = output.pop()
        item_2 = output.pop()
        output.append(operate(item_2, value, item_1))

print(output[0])



4


In [7]:
# Binary Tree

class Node:
    def __init__(self, d):
        self.data = d
        self.right = None
        self.left = None

firstNode = Node(1)
secondNode = Node(2)
thirdNode = Node(3)
fourthNode = Node(4)

firstNode.left = secondNode
firstNode.right = thirdNode
secondNode.left = fourthNode

def pre_order_dfs(node):
    if node is None:
        return
    print(node.data, end='')
    pre_order_dfs(node.left)
    pre_order_dfs(node.right)

def post_order_dfs(node):
    if node is None:
        return
    post_order_dfs(node.left)
    post_order_dfs(node.right)
    print(node.data, end='')

def in_order_dfs(node):
    if node is None:
        return
    in_order_dfs(node.left)
    print(node.data, end='')
    in_order_dfs(node.right)

pre_order_dfs(firstNode)
print('\n')
post_order_dfs(firstNode)
print('\n')
in_order_dfs(firstNode)


1243

4231

4213

In [8]:
# Parse Tree

infix_expression = '( ( 2 + 8 ) - ( 7 - 8 ) )'
currentNode = Node(None)
parentStack = [currentNode]

for value in infix_expression.split():
    if value == '(':
        currentNode.left = Node(None)
        parentStack.insert(0, currentNode)
        currentNode = currentNode.left
    elif value in ["+", "-", "/", "*"]:
        currentNode.data = value
        currentNode.right = Node(None)
        parentStack.insert(0, currentNode)
        currentNode = currentNode.right
    elif ord('0') <= ord(value) <= ord('9'):
        currentNode.data = value
        currentNode = parentStack.pop(0)
    elif value == ')':
        currentNode = parentStack.pop(0)
    else: 
        raise Exception('illegal argument')

in_order_dfs(currentNode)

2+8-7-8

In [9]:
# Evaluation

def evaluate(node: Node):
    leftchild = node.left
    rightchild = node.right

    if leftchild != None and rightchild != None:
        operator = node.data
        leftoperand = evaluate(leftchild)
        rightoperand = evaluate(rightchild)
        return operate(leftoperand, operator, rightoperand)
    else:
        return int(node.data)

evaluate(currentNode)

11