### Explored:
- PySpark Dataframe
- Creating a Spark Session
- Reading the dataset
- Making Column Headers
- Used type(), head(), printSchema(), inferSchema(), show(), describe()
- Selecting Columns
- Checking the Datatypes of column
- Adding Columns
- Dropping Columns
- Renaming Columns

In [44]:
import pyspark

In [45]:
import pandas as pd
pd.read_csv('test1.csv')

Unnamed: 0,Name,Age
0,Radhika,23
1,Shivam,24
2,Aashi,18


In [46]:
#always create a spark session
from pyspark.sql import SparkSession

In [47]:
spark = SparkSession.builder.appName("Practice").getOrCreate()

In [48]:
spark

In [49]:
#reading a dataset w.r.t spark
df_pyspark = spark.read.csv('test1.csv')

In [50]:
df_pyspark   #shows 2 columns c0 and c1

DataFrame[_c0: string, _c1: string]

In [51]:
#shows the entire dataset
df_pyspark.show()  

+-------+---+
|    _c0|_c1|
+-------+---+
|   Name|Age|
|Radhika| 23|
| Shivam| 24|
|  Aashi| 18|
+-------+---+



In [52]:
#want to make Name and Age as our column headers

In [53]:
#making column name and age as our main headings
df_pyspark = spark.read.option('header', 'true').csv('test1.csv')

#1st row value will now be considered as header

In [54]:
#using .show() to view
df_pyspark.show()

+-------+---+
|   Name|Age|
+-------+---+
|Radhika| 23|
| Shivam| 24|
|  Aashi| 18|
+-------+---+



In [55]:
#using type
type(df_pyspark)

pyspark.sql.dataframe.DataFrame

In [56]:
#using head()
df_pyspark.head(3)

[Row(Name='Radhika', Age='23'),
 Row(Name='Shivam', Age='24'),
 Row(Name='Aashi', Age='18')]

In [57]:
#using printSchema() to see more info abt columns
#checking the schema
df_pyspark.printSchema()

root
 |-- Name: string (nullable = true)
 |-- Age: string (nullable = true)



In [58]:
#It's taking Name & Age as String - because by default,it considers all as String 

In [59]:
#Using inferSchema in reading the dataset
df_pyspark = spark.read.option('header', 'true').csv('test1.csv', inferSchema=True)
df_pyspark.printSchema()

root
 |-- Name: string (nullable = true)
 |-- Age: integer (nullable = true)



In [60]:
#trying to include both header & inferSchema in one
df_pyspark= spark.read.csv('test1.csv', header=True, inferSchema=True)
df_pyspark.show()

+-------+---+
|   Name|Age|
+-------+---+
|Radhika| 23|
| Shivam| 24|
|  Aashi| 18|
+-------+---+



In [61]:
df_pyspark.printSchema()

root
 |-- Name: string (nullable = true)
 |-- Age: integer (nullable = true)



In [62]:
#checking type
type(df_pyspark)

pyspark.sql.dataframe.DataFrame

In [63]:
#getting column names
df_pyspark.columns

['Name', 'Age']

In [64]:
#selecting a particular column
df_pyspark.select('Name').show()

+-------+
|   Name|
+-------+
|Radhika|
| Shivam|
|  Aashi|
+-------+



In [65]:
#selecting more than 1 column
df_pyspark.select(['Name', 'Age']).show()

+-------+---+
|   Name|Age|
+-------+---+
|Radhika| 23|
| Shivam| 24|
|  Aashi| 18|
+-------+---+



In [66]:
#checking the datatypes
df_pyspark.dtypes

[('Name', 'string'), ('Age', 'int')]

In [67]:
#using describe()
df_pyspark.describe().show()

+-------+------+------------------+
|summary|  Name|               Age|
+-------+------+------------------+
|  count|     3|                 3|
|   mean|  NULL|21.666666666666668|
| stddev|  NULL|3.2145502536643185|
|    min| Aashi|                18|
|    max|Shivam|                24|
+-------+------+------------------+



In [68]:
#adding Columns in dataframe
df_pyspark = df_pyspark.withColumn('Age after 2 years', df_pyspark['Age'] + 2)

In [70]:
df_pyspark.show()

+-------+---+-----------------+
|   Name|Age|Age after 2 years|
+-------+---+-----------------+
|Radhika| 23|               25|
| Shivam| 24|               26|
|  Aashi| 18|               20|
+-------+---+-----------------+



In [73]:
#dropping the columns
df_pyspark = df_pyspark.drop('Age after 2 years')

In [74]:
df_pyspark.show()

+-------+---+
|   Name|Age|
+-------+---+
|Radhika| 23|
| Shivam| 24|
|  Aashi| 18|
+-------+---+



In [75]:
#renaming the column
df_pyspark.withColumnRenamed('Name', 'NewName').show()

+-------+---+
|NewName|Age|
+-------+---+
|Radhika| 23|
| Shivam| 24|
|  Aashi| 18|
+-------+---+

