
# International Trade: Data Lab 1 — Intro to Python
**Instructor:** Carlos Góes — *George Washington University*  
**Term:** Fall 2025



## Why Python? (Easy to understand)


In [1]:

# Minimal Python example
print("Hi!")


Hi!



## Integers, Floats, Strings, and Booleans
### Integers
Integers are whole numbers $$\mathbb{Z} = \{…, -2, -1, 0, 1, 2, …\}$$.


In [2]:
var = 20
print(var, type(var))

20 <class 'int'>



### Floats
Floats represent real numbers: $$\mathbb{R} \quad\equiv \quad \{ \cdots, -\pi, -2.5, -\sqrt{2}, -\frac{1}{2}, 0, \frac{1}{2}, \sqrt{2}, 2.5, \pi, \cdots \}$$

In [3]:
var = 3.14159265
print(var, type(var))

3.14159265 <class 'float'>



### Strings
Strings represent text.


In [4]:
var = "This is a string of characters"
print(var, type(var))

This is a string of characters <class 'str'>



### Booleans
Booleans represent logical truth values (`True` or `False`).


In [5]:
var1 = True
print(var1, type(var1))

var2 = (2 + 2 == 5)
print(var2, type(var2))

True <class 'bool'>
False <class 'bool'>



### Variable types and theoretical categorizations
- Discrete quantitative: integers (e.g., completed years of schooling)  
- Continuous quantitative: floats (e.g., weight in lbs)  
- Categorical qualitative: booleans or strings (e.g., race)  
- Ordinal qualitative: strings or integers (ordered categories)



## Numeric operations


In [6]:
x, y = 7, 3

print("Addition:", x + y)
print("Subtraction:", x - y)
print("Division:", x / y)
print("Multiplication:", x * y)
print("Exponentiation:", x ** y)
print("Modulo (remainder): 3 % 2 =", 3 % 2)

Addition: 10
Subtraction: 4
Division: 2.3333333333333335
Multiplication: 21
Exponentiation: 343
Modulo (remainder): 3 % 2 = 1



## String operations


In [7]:
str1 = "Carlos"
str2 = "Góes"
print("Concatenation:", str1 + " " + str2)

str3 = "a"
print("Repetition:", str3 * 10)

Concatenation: Carlos Góes
Repetition: aaaaaaaaaa


## Logical operations
Use `==` and `!=` (not `is`) for value comparisons.

In [8]:
# Equality
print("2 + 2 == 4:", 2 + 2 == 4)
s = "a"
print("s == 'a':", s == "a")

# Inequality
print("2 + 2 != 5:", 2 + 2 != 5)
print("s != 'b':", s != "b")

# Greater / Less than
print("10 > 100:", 10 > 100)
print("10 < 100:", 10 < 100)

2 + 2 == 4: True
s == 'a': True
2 + 2 != 5: True
s != 'b': True
10 > 100: False
10 < 100: True



### Logical arithmetic
`True` behaves like `1` and `False` behaves like `0` in arithmetic.


In [9]:
a = (1 + 1 == 2)
b = (2 * 2 == 4)
c = (2 * 2 == 5)

print("a, b, c:", a, b, c)
d_sum = a + b + c   # counts how many are True
d_prod = a * b * c  # 1 only if all are True
print("Sum (how many True?):", d_sum)
print("Product (are all True?):", d_prod)

a, b, c: True True False
Sum (how many True?): 2
Product (are all True?): 0


## Lists
Lists are ordered collections enclosed in square brackets `[]`.

In [10]:
list1 = [2, 20.5, "Hi!", 10 < 100]
print("list1:", list1)
print("Index 0:", list1[0])   # 2
print("Index 3:", list1[3])   # True

list1: [2, 20.5, 'Hi!', True]
Index 0: 2
Index 3: True


In [11]:
# Mutating lists
list1[0] = 20
print("After change:", list1)

# Append
list1.append(79.2)
print("After append:", list1)

# Multiplication (repeat)
list2 = list1 * 2
print("list2:", list2)

# Concatenation
list3 = [1, 2]
list4 = [5, 6]
list5 = list3 + list4
print("list5:", list5)

After change: [20, 20.5, 'Hi!', True]
After append: [20, 20.5, 'Hi!', True, 79.2]
list2: [20, 20.5, 'Hi!', True, 79.2, 20, 20.5, 'Hi!', True, 79.2]
list5: [1, 2, 5, 6]



## Dictionaries
Dictionaries map keys to values using curly braces `{}`.


In [12]:

person1 = {
    'name': 'Milton Friedman',
    'dob': '07/31/1912'
}
print(person1)
print("Name via key:", person1['name'])

# Add/update a key
person1.update({'nationality': 'USA'})
print("After update:", person1)

{'name': 'Milton Friedman', 'dob': '07/31/1912'}
Name via key: Milton Friedman
After update: {'name': 'Milton Friedman', 'dob': '07/31/1912', 'nationality': 'USA'}



# Introduction to `pandas`
Install `pandas` once via `pip install pandas` (already available here). Then import:


In [13]:
import numpy as np
import pandas as pd


## Series
Build a `Series` from other objects and access by label.


In [14]:
x = np.linspace(1, 10, 5)
label = ["a", "b", "c", "d", "e"]
series1 = pd.Series(x, name="Series1", index=label)
print(series1)
print("Access 'a':", series1["a"])
print("Access 'd':", series1["d"])

a     1.00
b     3.25
c     5.50
d     7.75
e    10.00
Name: Series1, dtype: float64
Access 'a': 1.0
Access 'd': 7.75



### Series from a dictionary
When built from a dictionary, indices come from keys.


In [15]:
gwuid  = {
    'Carlos Goes': '06/99209',
    "Nicolas Powidayko": '10/22290',
    "Alexander Rabbat": '08/21346',
    "Dani Alaino": '07/20345',
    "Lya Nikate": '09/23567',
    "Niz Borroz": '11/22035',
    "Tom Rundal": "98/20145"
}

series2 = pd.Series(gwuid, name="GWUID")
print(series2)

Carlos Goes          06/99209
Nicolas Powidayko    10/22290
Alexander Rabbat     08/21346
Dani Alaino          07/20345
Lya Nikate           09/23567
Niz Borroz           11/22035
Tom Rundal           98/20145
Name: GWUID, dtype: object



## DataFrames
DataFrames are collections of Series (aligned by index). Below we create two Series and assemble a DataFrame.


In [16]:
x = np.linspace(1, 10, 5)
y = np.linspace(1, 20, 5)
label = ["a","b","c","d","e"]
series1 = pd.Series(x, name="Series1", index=label)
series2 = pd.Series(y, name="Series2", index=label)
df = pd.DataFrame(data=[series1, series2])

print(df) # print table

           a     b     c      d     e
Series1  1.0  3.25   5.5   7.75  10.0
Series2  1.0  5.75  10.5  15.25  20.0


In [17]:
print("Columns (labels):", df.columns)
print("\nColumn 'a':")
print(df["a"])
print("\nRow 'Series1':")
print(df.loc["Series1"])
print("\nTransposed df:")
print(df.T)

Columns (labels): Index(['a', 'b', 'c', 'd', 'e'], dtype='object')

Column 'a':
Series1    1.0
Series2    1.0
Name: a, dtype: float64

Row 'Series1':
a     1.00
b     3.25
c     5.50
d     7.75
e    10.00
Name: Series1, dtype: float64

Transposed df:
   Series1  Series2
a     1.00     1.00
b     3.25     5.75
c     5.50    10.50
d     7.75    15.25
e    10.00    20.00



### Access a specific element
You can select column-first or row-first. (Both examples below grab the same value.)


In [18]:
print("df['a']['Series1']:", df["a"]["Series1"])
print("df.loc['Series1','a']:", df.loc["Series1","a"])

df['a']['Series1']: 1.0
df.loc['Series1','a']: 1.0



### Example: Students DataFrame with multiple attributes
We use the earlier `gwuid` dict and add `major` and `gpa`. Then we explore selection and boolean masking.


In [19]:
gwuid_seriess = pd.Series(gwuid)

major = pd.Series({
    'Carlos Goes': 'Economics',
    "Nicolas Powidayko": 'Economics',
    "Alexander Rabbat": 'Computer Science',
    "Dani Alaino": 'Computer Science',
    "Lya Nikate": 'Computer Science',
    "Niz Borroz": 'Statistics',
    "Tom Rundal": "Computer Science"
})

gpa = pd.Series({
    'Carlos Goes': 4.0,
    "Nicolas Powidayko": 3.8,
    "Alexander Rabbat": 3.8,
    "Dani Alaino": 3.4,
    "Lya Nikate": 3.3,
    "Niz Borroz": 3.0,
    "Tom Rundal": 3.0
})

df_students = pd.DataFrame(
    [gwuid_seriess, major, gpa],
    index=['gwuid', 'major', 'gpa']
).T

display(df_students) # display() has nicer formatting in Jupyter notebooks compared to print()

Unnamed: 0,gwuid,major,gpa
Carlos Goes,06/99209,Economics,4.0
Nicolas Powidayko,10/22290,Economics,3.8
Alexander Rabbat,08/21346,Computer Science,3.8
Dani Alaino,07/20345,Computer Science,3.4
Lya Nikate,09/23567,Computer Science,3.3
Niz Borroz,11/22035,Statistics,3.0
Tom Rundal,98/20145,Computer Science,3.0


In [20]:
# Lookups
print("Attributes of Carlos Goes:")
print(df_students.loc["Carlos Goes"])

print("\nAll registration numbers (gwuid):")
print(df_students["gwuid"])

print("\nBoolean mask for Computer Science majors:")
mask_cs = (df_students["major"] == "Computer Science")
print(mask_cs)

print("\nFiltered DataFrame (Computer Science majors):")
print(df_students[mask_cs])

Attributes of Carlos Goes:
gwuid     06/99209
major    Economics
gpa            4.0
Name: Carlos Goes, dtype: object

All registration numbers (gwuid):
Carlos Goes          06/99209
Nicolas Powidayko    10/22290
Alexander Rabbat     08/21346
Dani Alaino          07/20345
Lya Nikate           09/23567
Niz Borroz           11/22035
Tom Rundal           98/20145
Name: gwuid, dtype: object

Boolean mask for Computer Science majors:
Carlos Goes          False
Nicolas Powidayko    False
Alexander Rabbat      True
Dani Alaino           True
Lya Nikate            True
Niz Borroz           False
Tom Rundal            True
Name: major, dtype: bool

Filtered DataFrame (Computer Science majors):
                     gwuid             major  gpa
Alexander Rabbat  08/21346  Computer Science  3.8
Dani Alaino       07/20345  Computer Science  3.4
Lya Nikate        09/23567  Computer Science  3.3
Tom Rundal        98/20145  Computer Science  3.0
