In [1]:
from snowflake.snowpark import Session
from snowflake.core import Root
from snowflake.core.database import Database
from snowflake.core.schema import Schema

In [None]:
session = Session.builder.config("connection_name","myconnection").create()
root = Root(session)

In [None]:
## Create or replace Database on the Root
database = root.databases.create(Database(name="INVESTMENT_DB"), mode="orreplace")

In [None]:
## Create or replace Schema in a Database
schema = database.schemas.create(Schema(name="INVESTMENT_SCHEMA"), mode="orreplace")

In [None]:
import pandas as pd

In [None]:
sp500_info_df = pd.read_csv("../csv_dumps/sp500_info.csv")
sp500_info_df.head()

In [None]:
sp500_returns_df =  pd.read_csv("../csv_dumps/sp500_returns.csv")
sp500_returns_df.head()

In [None]:
session.sql('CREATE OR REPLACE TABLE sp500_returns (date DATE, symbol VARCHAR, adj_close FLOAT, close FLOAT, open FLOAT, high FLOAT, low FLOAT, volume FLOAT)').collect()

In [None]:
session.sql('SELECT count(*) from sp500_returns').collect()

In [None]:
## To create a DataFrame from data in a table, view, or stream
sp500_returns_dataframe = session.table("sp500_returns")
sp500_returns_dataframe.show()

In [None]:
## To create a DataFrame from specified values
df1 = session.create_dataframe([1, 2, 3, 4]).to_df("a")
df1.show()

In [None]:
## Create a DataFrame with 4 columns, “a”, “b”, “c” and “d”
df2 = session.create_dataframe([[1, 2, 3, 4]], 
                               schema=["a", "b", "c", "d"])
df2.show()

In [None]:
## Create another DataFrame with 4 columns, "a", "b", "c" and "d".
from snowflake.snowpark import Row
df3 = session.create_dataframe([Row(a=1, b=2, c=3, d=4)])
df3.show()

In [None]:
# Create a DataFrame and specify a schema
from snowflake.snowpark.types import IntegerType, StringType, StructType, StructField
schema = StructType([StructField("a", IntegerType()), StructField("b", StringType())])
df4 = session.create_dataframe([[1, "snow"], [3, "flake"]], schema)
df4.show()

In [None]:
## Create a DataFrame from a range
df_range = session.range(1, 10, 2).to_df("a")
df_range.show()

In [None]:
# Create DataFrames from data in a stage.
df_json = session.read.json("@my_stage2/data1.json")
df_catalog = session.read.schema(StructType([StructField("name", StringType()), StructField("age", IntegerType())])).csv("@stage/some_dir")

In [None]:
## Create DataFrame from a SQL query
df_sql = session.sql('SELECT * from sp500_returns')
df_sql.show()

In [None]:
df_sql_read = session.table("sp500_returns")
df_sql_read.show()

Transformations

In [None]:
from snowflake.snowpark.functions import col