In [0]:
from pyspark.sql import SparkSession
from pyspark.sql import HiveContext
from pyspark.sql import SQLContext, Row

In [0]:
# Create Spark Context

sparkContext = SparkSession \
    .builder \
    .appName("Apartment POC") \
    .getOrCreate()

# Create Hive Context
hivecontext=HiveContext(sparkContext)

# configure Hive Context
hivecontext.setConf('hive.support.concurrency','true');
hivecontext.setConf('hive.enforce.bucketing','true');
hivecontext.setConf('hive.exec.dynamic.partition', 'true')
hivecontext.setConf('hive.exec.dynamic.partition.mode','nostrict');
hivecontext.setConf('hive.compactor.initiator.on','true');
hivecontext.setConf('hive.compactor.worker.threads','1');

sqlContext = SQLContext(sparkContext)

In [0]:
df = hivecontext.sql("select  * from newData")
distinctCountry = df.select(df["country"]).distinct().show()

In [0]:
# Create table for landlord
hivecontext.sql("CREATE TABLE IF NOT EXISTS landlord_delta (Landlord_id INT,  Password  STRING,  Landlord_name  STRING, Address_line_1 STRING, City  STRING, Post_code  STRING, Region  STRING, EventTimestamp timestamp   ) USING DELTA")

In [0]:
# create table for building
hivecontext.sql("CREATE TABLE IF NOT EXISTS building_delta (Landlord_id INT,  Building_name  STRING,  Address_line_1  STRING,  City  STRING, Post_code  STRING, Region  STRING, EventTimestamp timestamp ) USING DELTA")

In [0]:
# Create table for apartment
hivecontext.sql("CREATE TABLE IF NOT EXISTS apartment_delta (Apartment_number INT,  Type STRING,  Rent_fee  FLOAT,  Building_name  STRING, Appt_details   STRING, EventTimestamp timestamp ) USING DELTA")

In [0]:
#Create Contractor Hive Table
#
hivecontext.sql("CREATE TABLE IF NOT EXISTS contractor_delta (Contract_id INT,  Name  STRING, Address_line_1 STRING, City  STRING, Post_code  STRING, Region  STRING, EventTimestamp timestamp ) USING DELTA")

In [0]:
#Create Tenant Hive Table
#
hivecontext.sql("CREATE TABLE IF NOT EXISTS tenant_delta (Tenant_id INT,  First_name  STRING, Last_name STRING, Ssn  STRING, Phone STRING, Email  STRING, Mobile  STRING, EventTimestamp timestamp ) USING DELTA")


In [0]:
#Create ApartmentMaintenance Hive Table
#
hivecontext.sql("CREATE TABLE IF NOT EXISTS apartment_maintenance (Maintenance_id INT,  Apartment_number  INT,  Mdate  STRING,  Issue_reported  STRING, Contractor_id  INT, Resolution  STRING, Status  STRING, Charges_incurred  STRING,  EventTimestamp timestamp ) USING DELTA")


In [0]:
# Create table for landlord
hivecontext.sql("CREATE TABLE IF NOT EXISTS landlord_data (landlord_seq INT, Landlord_id INT,  Password  STRING,  Landlord_name  STRING, Address_line_1 STRING, City  STRING, Post_code  STRING, Region  STRING, EventTimestamp timestamp   )  PARTITIONED BY (Load_date STRING)")


# create table for building
hivecontext.sql("CREATE TABLE IF NOT EXISTS building_data (building_seq INT, landlord_seq INT, Landlord_id INT, Building_name  STRING,  Address_line_1  STRING,  City  STRING, Post_code  STRING, Region  STRING, EventTimestamp timestamp ) PARTITIONED BY (Load_date STRING)")

# Create table for apartment
hivecontext.sql("CREATE TABLE IF NOT EXISTS apartment_data (apartment_seq INT,  Apartment_number INT, Type STRING,  Rent_fee  FLOAT,  building_seq INT, Building_name  STRING, Appt_details   STRING, EventTimestamp timestamp ) PARTITIONED BY (Load_date STRING)")

#Create Contractor Hive Table
hivecontext.sql("CREATE TABLE IF NOT EXISTS contractor_data (contract_seq INT, Contract_id INT,  Name  STRING, Address_line_1 STRING, City  STRING, Post_code  STRING, Region  STRING, EventTimestamp timestamp )PARTITIONED BY (Load_date STRING)")

#Create Tenant Hive Table
#
hivecontext.sql("CREATE TABLE IF NOT EXISTS tenant_data (tenant_seq INT, Tenant_id INT,  First_name  STRING, Last_name STRING, Ssn  STRING, Phone STRING, Email  STRING, Mobile  STRING, EventTimestamp timestamp ) PARTITIONED BY (Load_date STRING)")


In [0]:
# Create table for landlord
hivecontext.sql("CREATE TABLE IF NOT EXISTS landlord_data (landlord_seq INT, Landlord_id INT,  Password  STRING,  Landlord_name  STRING, Address_line_1 STRING, City  STRING, Post_code  STRING, Region  STRING, EventTimestamp timestamp   )  PARTITIONED BY (Load_date STRING)")


# create table for building
hivecontext.sql("CREATE TABLE IF NOT EXISTS building_data (landlord_seq INT,  Building_name  STRING,  Address_line_1  STRING,  City  STRING, Post_code  STRING, Region  STRING, EventTimestamp timestamp ) PARTITIONED BY (Load_date STRING)")


# Create table for apartment
hivecontext.sql("CREATE TABLE IF NOT EXISTS apartment_data (Apartment_number INT,  Type STRING,  Rent_fee  FLOAT,  Building_name  STRING, Appt_details   STRING, EventTimestamp timestamp ) PARTITIONED BY (Load_date STRING)")


#Create Contractor Hive Table
hivecontext.sql("CREATE TABLE IF NOT EXISTS contractor_data (Contract_id INT,  Name  STRING, Address_line_1 STRING, City  STRING, Post_code  STRING, Region  STRING, EventTimestamp timestamp )PARTITIONED BY (Load_date STRING)")

#Create Tenant Hive Table
#
hivecontext.sql("CREATE TABLE IF NOT EXISTS tenant_data (Tenant_id INT,  First_name  STRING, Last_name STRING, Ssn  STRING, Phone STRING, Email  STRING, Mobile  STRING, EventTimestamp timestamp ) PARTITIONED BY (Load_date STRING)")


#Create ApartmentMaintenance Hive Table
#
hivecontext.sql("CREATE TABLE IF NOT EXISTS apartment_maintenance_data (Maintenance_id INT,  Apartment_number  INT,  Mdate  STRING,  Issue_reported  STRING, Contractor_id  INT, Resolution  STRING, Status  STRING, Charges_incurred  STRING,  EventTimestamp timestamp ) PARTITIONED BY (Load_date STRING)")



In [0]:
import datetime
print(datetime.date.today())
dateStr = datetime.datetime.today().strftime("%m-%d-%Y %H:%M:%S")
print(datetime.datetime.today().timetuple())
type(dateStr)
print(dateStr)

In [0]:
# Create table for landlord
hivecontext.sql("CREATE TABLE IF NOT EXISTS landlord_i(landlord_seq INT, Landlord_id INT,  Password  STRING,  Landlord_name  STRING, Address_line_1 STRING, City  STRING, Post_code  STRING, Region  STRING, EventTimestamp timestamp   )   ")


# create table for building
hivecontext.sql("CREATE TABLE IF NOT EXISTS building_i (landlord_seq INT,  Building_name  STRING,  Address_line_1  STRING,  City  STRING, Post_code  STRING, Region  STRING, EventTimestamp timestamp )  ")


# Create table for apartment
hivecontext.sql("CREATE TABLE IF NOT EXISTS apartment_i (apartment_seq INT,  Apartment_number INT,  Type STRING,  Rent_fee  FLOAT,  Building_name  STRING, Appt_details   STRING, EventTimestamp timestamp ) ")


#Create Contractor Hive Table
hivecontext.sql("CREATE TABLE IF NOT EXISTS contractor_i (contract_seq INT, Contract_id INT,  Name  STRING, Address_line_1 STRING, City  STRING, Post_code  STRING, Region  STRING, EventTimestamp timestamp ) ")

#Create Tenant Hive Table
#
hivecontext.sql("CREATE TABLE IF NOT EXISTS tenant_i (tenant_seq INT, Tenant_id INT,  First_name  STRING, Last_name STRING, Ssn  STRING, Phone STRING, Email  STRING, Mobile  STRING, EventTimestamp timestamp )  ")


#Create ApartmentMaintenance Hive Table
#
hivecontext.sql("CREATE TABLE IF NOT EXISTS apartment_maintenance_i (maintenance_seq INT, Maintenance_id INT,  apartment_seq  INT,  Mdate  STRING,  Issue_reported  STRING, contract_seq  INT, Resolution  STRING, Status  STRING, Charges_incurred  STRING,  EventTimestamp timestamp ) ")



In [0]:
hivecontext.sql("CREATE TABLE IF NOT EXISTS report (Date STRING, val1  INT, val2  INT,  color  STRING) ")

