# PySpark Dataframes
For each instruction, type the appropriate code into the cell below the instruction. Then, run the code by pressing the `Run` button above.

1. Import the SparkSession class:

```
from pyspark.sql import SparkSession
```

In [0]:
from pyspark.sql import SparkSession

In [0]:
spark = SparkSession \
    .builder \
    .appName(" Intro to dataframes") \
    .getOrCreate()
    

In [0]:
spark

In [0]:
accounts= spark.read.option('header','true').csv('/FileStore/tables/accounts-2.csv') #Read the contents of a csv file into a Dataframe named 'accounts':

In [0]:
accounts.printSchema()

root
 |-- account_number: string (nullable = true)
 |-- aba: string (nullable = true)
 |-- bic: string (nullable = true)
 |-- opened: string (nullable = true)
 |-- balance: string (nullable = true)



In [0]:
transcations = spark.read.option('header','true').parquet('/FileStore/tables/transactions.parquet') #Read the contents of a parquet file into a variable

In [0]:
transcations.printSchema() 
transcations.count()

root
 |-- account_number: string (nullable = true)
 |-- amount: long (nullable = true)
 |-- datetime: date (nullable = true)

Out[7]: 1000000

In [0]:
#Make a new Dataframe by grouping the transactions by account number and summing the groups. This will combine the transactions per account:
acc_trans = transcations.groupby('account_number').sum()
acc_trans.head(5)

Out[8]: [Row(account_number='FBXK78425844480007', sum(amount)=-99434),
 Row(account_number='XJIU55438863095422', sum(amount)=77947),
 Row(account_number='XBYT37304125118047', sum(amount)=65101),
 Row(account_number='UQSE17000937342665', sum(amount)=118473),
 Row(account_number='KWOU43650129218895', sum(amount)=-35411)]

In [0]:
#Combine the accounts with the summed transaction values:
with_sum = accounts.join(acc_trans,'account_number','inner')
with_sum.show()

+------------------+---------+-----------+----------+-------+-----------+
|    account_number|      aba|        bic|    opened|balance|sum(amount)|
+------------------+---------+-----------+----------+-------+-----------+
|FBXK78425844480007|125056531|   TFJQGB5D|2014-05-07| 710027|     -99434|
|XJIU55438863095422|036174230|   KILTGB3K|2009-12-18| 339452|      77947|
|XBYT37304125118047|061333512|UVXHGB0SHJC|2013-12-21| 948840|      65101|
|UQSE17000937342665|028459374|JBPYGB7X207|2009-07-30| 928422|     118473|
|KWOU43650129218895|038163281|   PHLOGB1V|2016-06-14|1119190|     -35411|
|FNUS11547939884469|048504775|   KVXDGBQT|2001-06-23| 911119|     -61117|
|RWKR28406701224417|085114102|   INMGGB96|2013-10-13|1291803|      12973|
|NPLS84562938853238|125331256|   IHEAGB8O|2015-05-21| 933824|     -93579|
|FZHT89141815092168|018700127|NAIYGBBS2HS|2018-02-16|1926118|     -26762|
|KNLA77840498184902|093772264|   SFLUGBG7|2020-06-19|1291060|      46427|
|WLIZ87856094022198|050590353|   JAMLG