## Working with Numpy and Pandas

In [1]:
# we usually import the necessary stuff right at the top
import numpy as np
import pandas as pd

In [2]:
# we can load up any csv like this
lp = pd.read_csv('data/landing_pages.csv') # this might be a URL

In [4]:
lp.info() # we have a DataFame

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3 entries, 0 to 2
Data columns (total 2 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   Document Type  3 non-null      object
 1   Landing Page   3 non-null      object
dtypes: object(2)
memory usage: 180.0+ bytes


### DataFrame (part of Pandas)
Rather like an Excel spreasheet, but rows are 0, 1, 2 
Each column may only contain a SINGLE data type (Dtypes)


In [6]:
lp.describe()

Unnamed: 0,Document Type,Landing Page
count,3,3
unique,1,3
top,Service Agreement,master subscription agreement
freq,3,1


In [8]:
# we may use slicing to access parts of the DataFrame
lp['Document Type']
lp['Landing Page']# a slice of one column

0     master subscription agreement
1    professional services contract
2          master service agreement
Name: Landing Page, dtype: object

In [10]:
# slicing lets us pick more than one column to show
lp[['Landing Page', 'Document Type']] # a slice showing more than one column MUST be in a list

Unnamed: 0,Landing Page,Document Type,Document Type.1
0,master subscription agreement,Service Agreement,Service Agreement
1,professional services contract,Service Agreement,Service Agreement
2,master service agreement,Service Agreement,Service Agreement


In [11]:
whichColumns = ['Landing Page', 'Document Type'] # we have a list
lp[whichColumns]

Unnamed: 0,Landing Page,Document Type
0,master subscription agreement,Service Agreement
1,professional services contract,Service Agreement
2,master service agreement,Service Agreement


### Challenge
* read the define_links.csv file into a DataFame
* see the .info()
* show just the slug column
* show the title and slug columns

In [12]:
defLinks = pd.read_csv('data/define_links.csv')

In [13]:
defLinks.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3077 entries, 0 to 3076
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   Slug    3077 non-null   object
 1   Title   3077 non-null   object
 2   URL     3077 non-null   object
dtypes: object(3)
memory usage: 72.2+ KB


In [15]:
defLinks['Slug'] # careful - case sensitive

0                       facts-in-issue
1                     offshore-project
2                      employers-share
3                     retail-liability
4                  debt-to-value-ratio
                     ...              
3072                          accounts
3073               academic-supervisor
3074              academic-publication
3075    academic-and-research-purposes
3076                       fiscal-year
Name: Slug, Length: 3077, dtype: object

In [16]:
cols = ['Title','Slug']
defLinks[cols]

Unnamed: 0,Title,Slug
0,facts in issue,facts-in-issue
1,offshore project,offshore-project
2,employers share,employers-share
3,retail liability,retail-liability
4,debt to value ratio,debt-to-value-ratio
...,...,...
3072,accounts,accounts
3073,academic supervisor,academic-supervisor
3074,academic publication,academic-publication
3075,academic and research purposes,academic-and-research-purposes


In [29]:
defLinks

Unnamed: 0,Slug,Title,URL
0,facts-in-issue,facts in issue,https://www.genieai.co/en-us/define/facts-in-i...
1,offshore-project,offshore project,https://www.genieai.co/en-us/define/offshore-p...
2,employers-share,employers share,https://www.genieai.co/en-us/define/employers-...
3,retail-liability,retail liability,https://www.genieai.co/en-us/define/retail-lia...
4,debt-to-value-ratio,debt to value ratio,https://www.genieai.co/en-us/define/debt-to-va...
...,...,...,...
3072,accounts,accounts,https://www.genieai.co/en-us/define/accounts
3073,academic-supervisor,academic supervisor,https://www.genieai.co/en-us/define/academic-s...
3074,academic-publication,academic publication,https://www.genieai.co/en-us/define/academic-p...
3075,academic-and-research-purposes,academic and research purposes,https://www.genieai.co/en-us/define/academic-a...


In [30]:
defLinks['URL'][2985] # slide a particular row number

'https://www.genieai.co/en-us/define/effective-termination-date'

In [31]:
defLinks.loc[2] # locate a row by its index name

Slug                                       employers-share
Title                                      employers share
URL      https://www.genieai.co/en-us/define/employers-...
Name: 2, dtype: object

In [32]:
defLinks.iloc[2] # locates a row by its underlying index number

Slug                                       employers-share
Title                                      employers share
URL      https://www.genieai.co/en-us/define/employers-...
Name: 2, dtype: object

In [34]:
# We may persuade large DataFrames to show more than just the first/last 5 members
defLinks.head(25)
defLinks.tail(12)

Unnamed: 0,Slug,Title,URL
3065,agreement-manager,agreement manager,https://www.genieai.co/en-us/define/agreement-...
3066,agreement,agreement,https://www.genieai.co/en-us/define/agreement
3067,affiliate,affiliate,https://www.genieai.co/en-us/define/affiliate
3068,adequate-procedures,adequate procedures,https://www.genieai.co/en-us/define/adequate-p...
3069,act-of-insolvency,act of insolvency,https://www.genieai.co/en-us/define/act-of-ins...
3070,act,act,https://www.genieai.co/en-us/define/act
3071,accounts-date,accounts date,https://www.genieai.co/en-us/define/accounts-date
3072,accounts,accounts,https://www.genieai.co/en-us/define/accounts
3073,academic-supervisor,academic supervisor,https://www.genieai.co/en-us/define/academic-s...
3074,academic-publication,academic publication,https://www.genieai.co/en-us/define/academic-p...


### Working with path

In [35]:
import sys

In [40]:
sys.version_info # info about the current python being used
sys.path.append('D:\\FrameworkFeb25\\.venv\\stuffInHere')

In [41]:
sys.path

['C:\\Users\\rb17\\AppData\\Local\\Programs\\Python\\Python313\\python313.zip',
 'C:\\Users\\rb17\\AppData\\Local\\Programs\\Python\\Python313\\DLLs',
 'C:\\Users\\rb17\\AppData\\Local\\Programs\\Python\\Python313\\Lib',
 'C:\\Users\\rb17\\AppData\\Local\\Programs\\Python\\Python313',
 'D:\\FrameworkFeb25\\.venv',
 '',
 'D:\\FrameworkFeb25\\.venv\\Lib\\site-packages',
 'D:\\FrameworkFeb25\\.venv\\Lib\\site-packages\\win32',
 'D:\\FrameworkFeb25\\.venv\\Lib\\site-packages\\win32\\lib',
 'D:\\FrameworkFeb25\\.venv\\Lib\\site-packages\\Pythonwin',
 'D:\\FrameworkFeb25\\.venv\\stuffInHere']