In [2]:
import pandas as pd
from pyspark.sql import SparkSession
from google.colab import drive

# Mount Google Drive
drive.mount('/content/drive')

# Correct path to the CSV in Google Drive
file_path = 'investments.csv'

# Initialize Spark Session
spark = SparkSession.builder.appName("sparksql").getOrCreate()

# Load dataset using the correct path
data = spark.read.csv(file_path, header=True, inferSchema=True)

# Show data preview
data.show()

# Display column names and schema
print(data.columns)
data.printSchema()

# Create a temporary SQL view
data.createOrReplaceTempView('investment')

print("First 10 rows:")
spark.sql("SELECT * FROM investment LIMIT 10").show()

print("Startups in the USA:")
usastartups = spark.sql("SELECT * FROM investment WHERE country_code = 'USA'")
usastartups.show()

# Create temporary view for USA startups
usastartups.createOrReplaceTempView('usastartup_table')

# View data from the USA startup table
spark.sql("SELECT * FROM usastartup_table LIMIT 10").show()

# Get most seeded startup including venture and first_funding_at
most_seed = spark.sql("""
    SELECT name, seed, venture, first_funding_at
    FROM usastartup_table
    WHERE seed IS NOT NULL
    ORDER BY seed DESC
""")
most_seed.show(5)  # Top 5 for verification

# Display the first row as dictionary
most_seed_startup = most_seed.head(1)[0].asDict()
print("Most Seeded USA Startup:")
print(most_seed_startup)

# Access additional fields safely
print("Venture Funding Amount:", most_seed_startup.get('venture'))
print("First Funding Date:", most_seed_startup.get('first_funding_at'))

# Stop Spark Session
spark.stop()

Mounted at /content/drive
+--------------------+------------------+--------------------+--------------------+--------------------+-------------------+---------+------------+----------+--------------------+-----------------+--------------+----------+-------------------+---------------+------------+----------------+---------------+-------+-------+-------------------+-----------+----------------+--------------+-----+-----+--------------+---------------+-------------+----------------+--------------------+-------+-------+-------+-------+-------+-------+-------+-------+
|           permalink|              name|        homepage_url|       category_list|             market | funding_total_usd |   status|country_code|state_code|              region|             city|funding_rounds|founded_at|      founded_month|founded_quarter|founded_year|first_funding_at|last_funding_at|   seed|venture|equity_crowdfunding|undisclosed|convertible_note|debt_financing|angel|grant|private_equity|post_ipo_equity|p