In [1]:
import pandas as pd
print(pd.__version__)

2.2.3


In [2]:
import pandas as pd

# Creating a Series: Boiling points (in °C) of halogens
boiling_points = pd.Series(
    [-188.1, -34.0, 59.5, 184.4, 336.8],  # Values
    index=["F", "Cl", "Br", "I", "At"],  # Element symbols as labels
    name="Boiling Point (°C)"
    )


# Display the Series
print(boiling_points)

# Example Operations
print("\nHighest Boiling Point:", boiling_points.max())  # Get the highest boiling point
print("\nSorted Boiling Points:\n", boiling_points.sort_values())  # Sort in ascending order
print("\nRanked Boiling Points:\n", boiling_points.rank())  # Rank the elements


F    -188.1
Cl    -34.0
Br     59.5
I     184.4
At    336.8
Name: Boiling Point (°C), dtype: float64

Highest Boiling Point: 336.8

Sorted Boiling Points:
 F    -188.1
Cl    -34.0
Br     59.5
I     184.4
At    336.8
Name: Boiling Point (°C), dtype: float64

Ranked Boiling Points:
 F     1.0
Cl    2.0
Br    3.0
I     4.0
At    5.0
Name: Boiling Point (°C), dtype: float64


In [3]:
import numpy as np
import pandas as pd
arr = np.array([1,2,3,4])
s=pd.Series(arr, index=(['H','He', 'Li', 'Be']))
print(s)

H     1
He    2
Li    3
Be    4
dtype: int64


In [4]:
# Creating a Series from a dictionary (keys become the index)
data = {"H": 1.008, "He": 4.0026, "Li": 6.94, "Be": 9.0122}
s = pd.Series(data, name="Atomic Mass (g/mol)")
print(s)

H     1.0080
He    4.0026
Li    6.9400
Be    9.0122
Name: Atomic Mass (g/mol), dtype: float64


In [5]:
s = pd.Series(1, index=["A", "B", "C", "D"])
print(s)

A    1
B    1
C    1
D    1
dtype: int64


In [6]:
import pandas as pd

# Creating a DataFrame
df = pd.DataFrame({"Element": ["H", "He", "Li"], "Atomic Mass": [1.008, 4.0026, 6.94]})

# Extracting a single column as a Series
atomic_mass_series = df["Atomic Mass"]
print(type(atomic_mass_series))  # <class 'pandas.core.series.Series'>
print(atomic_mass_series)

<class 'pandas.core.series.Series'>
0    1.0080
1    4.0026
2    6.9400
Name: Atomic Mass, dtype: float64


In [7]:
df["Atomic Mass Squared"] = df["Atomic Mass"].apply(lambda x: x ** 2)
print(type(df["Atomic Mass Squared"]))  # <class 'pandas.core.series.Series'>
print(df["Atomic Mass Squared"])

<class 'pandas.core.series.Series'>
0     1.016064
1    16.020807
2    48.163600
Name: Atomic Mass Squared, dtype: float64


In [8]:
import pandas as pd

# Step 1: Create a DataFrame of electronegativity values for halogens
halogens = pd.DataFrame({
    "Element": ["F", "Cl", "Br", "I", "At"],
    "Electronegativity": [3.98, 3.16, 2.96, 2.66, 2.2]
})

# Step 2: Define a function to classify elements based on electronegativity
def classify_electronegativity(value):
    if value >= 3.5:
        return "Highly Electronegative"
    elif value >= 2.5:
        return "Moderately Electronegative"
    else:
        return "Low Electronegativity"

# Step 3: Use `map()` to create a new Series with classifications
electronegativity_class = halogens["Electronegativity"].map(classify_electronegativity)

# Step 4: Add the new classification Series to the DataFrame
halogens["Electronegativity Category"] = electronegativity_class

# Step 5: Display the Series and DataFrame
print(electronegativity_class)
print(type(electronegativity_class))
print("\n")
print(halogens)
print(type(halogens))
# Step 6 (Optional): Save the DataFrame to a CSV file
halogens.to_csv("halogens_electronegativity.csv", index=False)


0        Highly Electronegative
1    Moderately Electronegative
2    Moderately Electronegative
3    Moderately Electronegative
4         Low Electronegativity
Name: Electronegativity, dtype: object
<class 'pandas.core.series.Series'>


  Element  Electronegativity  Electronegativity Category
0       F               3.98      Highly Electronegative
1      Cl               3.16  Moderately Electronegative
2      Br               2.96  Moderately Electronegative
3       I               2.66  Moderately Electronegative
4      At               2.20       Low Electronegativity
<class 'pandas.core.frame.DataFrame'>


In [9]:
import pandas as pd

# Create a sample DataFrame with chemical properties
df = pd.DataFrame({
    "Element": ["Na", "Mg", "Al", "Si", "P"],
    "Atomic Number": [11, 12, 13, 14, 15],
    "Electronegativity": [0.93, 1.31, 1.61, 1.90, 2.19]
})

# Extract the "Electronegativity" column using iloc
electronegativity_series = df.iloc[:, 2]  # Selecting column index 2

print(type(electronegativity_series))  # Output: <class 'pandas.core.series.Series'>
print(electronegativity_series)


<class 'pandas.core.series.Series'>
0    0.93
1    1.31
2    1.61
3    1.90
4    2.19
Name: Electronegativity, dtype: float64


In [10]:
# Extract "Electronegativity" column using loc
electronegativity_series = df.loc[:, "Electronegativity"]

print(type(electronegativity_series))  # Output: <class 'pandas.core.series.Series'>
print(electronegativity_series)


<class 'pandas.core.series.Series'>
0    0.93
1    1.31
2    1.61
3    1.90
4    2.19
Name: Electronegativity, dtype: float64


In [11]:
import pandas as pd
import numpy as np

# Creating a DataFrame with missing values
data = {
    "Element": ["H", "He", "Li", "Be", "B", "C", "N", "O", "F", "Ne",
                "Na", "Mg", "Al", "Si", "P", "S", "Cl", "Ar"],
    "Atomic_Weight": [1.008, 4.0026, 6.94, 9.0122, 10.81, 12.011, 14.007, 
                      15.999, 18.998, 20.180, 22.990, None, 26.981, 28.085, 
                      30.974, None, 35.45, 39.948]
}

df = pd.DataFrame(data)

# Display the original DataFrame
print("Original DataFrame:")
print(df)

Original DataFrame:
   Element  Atomic_Weight
0        H         1.0080
1       He         4.0026
2       Li         6.9400
3       Be         9.0122
4        B        10.8100
5        C        12.0110
6        N        14.0070
7        O        15.9990
8        F        18.9980
9       Ne        20.1800
10      Na        22.9900
11      Mg            NaN
12      Al        26.9810
13      Si        28.0850
14       P        30.9740
15       S            NaN
16      Cl        35.4500
17      Ar        39.9480


In [12]:
df_cleaned = df.dropna()
print("\nDataFrame after dropping missing values:")
print(df_cleaned)


DataFrame after dropping missing values:
   Element  Atomic_Weight
0        H         1.0080
1       He         4.0026
2       Li         6.9400
3       Be         9.0122
4        B        10.8100
5        C        12.0110
6        N        14.0070
7        O        15.9990
8        F        18.9980
9       Ne        20.1800
10      Na        22.9900
12      Al        26.9810
13      Si        28.0850
14       P        30.9740
16      Cl        35.4500
17      Ar        39.9480


In [13]:
data = {
    "Element": ["H", "He", "Li", None, "B"],
    "State": ["Gas", "Gas", "Solid", None, "Solid"]
}

df1 = pd.DataFrame(data)
print(df1)
# Ensure missing values are explicitly set to NaN
df1 = df1.fillna(np.nan)

print(df1)

  Element  State
0       H    Gas
1      He    Gas
2      Li  Solid
3    None   None
4       B  Solid
  Element  State
0       H    Gas
1      He    Gas
2      Li  Solid
3     NaN    NaN
4       B  Solid


In [14]:
import numpy as np
import pandas as pd

# Create DataFrame
data = {
    "Element": ["H", "He", "Li", "Be", "B"],
    "Atomic_Weight": [1.008, 4.0026, "N/A", "?", 10.81]
}

df2 = pd.DataFrame(data)
print("Original DataFrame:")
print(df2)

# ✅ Fully Future-Proof Method
df2["Atomic_Weight"] = df2["Atomic_Weight"].astype("string")  # Force string type to avoid downcasting
df2["Atomic_Weight"] = df2["Atomic_Weight"].replace(["N/A", "?"], np.nan)  # Replace missing values
df2["Atomic_Weight"] = df2["Atomic_Weight"].astype(float)  # Convert to float after replacement

print("\nUpdated DataFrame:")
print(df2)





Original DataFrame:
  Element Atomic_Weight
0       H         1.008
1      He        4.0026
2      Li           N/A
3      Be             ?
4       B         10.81

Updated DataFrame:
  Element  Atomic_Weight
0       H         1.0080
1      He         4.0026
2      Li            NaN
3      Be            NaN
4       B        10.8100


In [15]:
df_mean_filled = df.copy()
df_mean_filled["Atomic_Weight"] = df_mean_filled["Atomic_Weight"].fillna(df["Atomic_Weight"].mean())

print("\nDataFrame with missing values replaced by the mean:")
print(df_mean_filled)



DataFrame with missing values replaced by the mean:
   Element  Atomic_Weight
0        H       1.008000
1       He       4.002600
2       Li       6.940000
3       Be       9.012200
4        B      10.810000
5        C      12.011000
6        N      14.007000
7        O      15.999000
8        F      18.998000
9       Ne      20.180000
10      Na      22.990000
11      Mg      18.587238
12      Al      26.981000
13      Si      28.085000
14       P      30.974000
15       S      18.587238
16      Cl      35.450000
17      Ar      39.948000


In [16]:
df_filled_custom = df.copy()

# ✅ Assign the modified column explicitly (instead of using inplace=True)
df_filled_custom["Atomic_Weight"] = df_filled_custom["Atomic_Weight"].fillna(0)

print("\nDataFrame with missing values replaced by 0:")
print(df_filled_custom)



DataFrame with missing values replaced by 0:
   Element  Atomic_Weight
0        H         1.0080
1       He         4.0026
2       Li         6.9400
3       Be         9.0122
4        B        10.8100
5        C        12.0110
6        N        14.0070
7        O        15.9990
8        F        18.9980
9       Ne        20.1800
10      Na        22.9900
11      Mg         0.0000
12      Al        26.9810
13      Si        28.0850
14       P        30.9740
15       S         0.0000
16      Cl        35.4500
17      Ar        39.9480


In [17]:
df_valid = df[df["Atomic_Weight"].notna()]
print("\nDataFrame with only valid atomic weights:")
print(df_valid)


DataFrame with only valid atomic weights:
   Element  Atomic_Weight
0        H         1.0080
1       He         4.0026
2       Li         6.9400
3       Be         9.0122
4        B        10.8100
5        C        12.0110
6        N        14.0070
7        O        15.9990
8        F        18.9980
9       Ne        20.1800
10      Na        22.9900
12      Al        26.9810
13      Si        28.0850
14       P        30.9740
16      Cl        35.4500
17      Ar        39.9480


In [18]:
# Creating a DataFrame with physical properties of alkali metals
alkali_metals = pd.DataFrame({
    "Atomic Number": [3, 11, 19, 37, 55],  # Lithium to Cesium
    "Atomic Radius (pm)": [152, 186, 227, 248, 265],
    "Density (g/cm³)": [0.534, 0.97, 0.86, 1.53, 1.87],
    "Melting Point (°C)": [180.5, 97.8, 63.5, 39.3, 28.5]
}, index=["Li", "Na", "K", "Rb", "Cs"])  # Using symbols as row index

# Display the DataFrame
print(alkali_metals)

# Example Operations
print("\nMean Atomic Radius:", alkali_metals["Atomic Radius (pm)"].mean())  # Average atomic radius
print("\nSorted by Melting Point:\n", alkali_metals.sort_values("Melting Point (°C)"))  # Sort by melting point
print("\nDensity Correlation:\n", alkali_metals.corr())  # Correlation between properties


    Atomic Number  Atomic Radius (pm)  Density (g/cm³)  Melting Point (°C)
Li              3                 152            0.534               180.5
Na             11                 186            0.970                97.8
K              19                 227            0.860                63.5
Rb             37                 248            1.530                39.3
Cs             55                 265            1.870                28.5

Mean Atomic Radius: 215.6

Sorted by Melting Point:
     Atomic Number  Atomic Radius (pm)  Density (g/cm³)  Melting Point (°C)
Cs             55                 265            1.870                28.5
Rb             37                 248            1.530                39.3
K              19                 227            0.860                63.5
Na             11                 186            0.970                97.8
Li              3                 152            0.534               180.5

Density Correlation:
                     Ato

In [19]:
# Convert melting points from °C to Kelvin (for both Series & DataFrame)
to_kelvin = lambda temp: temp + 273.15

# Apply function to the boiling points Series
boiling_points_K = boiling_points.apply(to_kelvin)
print("\nBoiling Points in Kelvin:\n", boiling_points_K)

# Apply function to the DataFrame column
#print("\n", alkali_metals)
alkali_metals["Melting Point (K)"] = alkali_metals["Melting Point (°C)"].apply(to_kelvin)
print("\nUpdated Alkali Metals DataFrame:\n", alkali_metals)



Boiling Points in Kelvin:
 F      85.05
Cl    239.15
Br    332.65
I     457.55
At    609.95
Name: Boiling Point (°C), dtype: float64

Updated Alkali Metals DataFrame:
     Atomic Number  Atomic Radius (pm)  Density (g/cm³)  Melting Point (°C)  \
Li              3                 152            0.534               180.5   
Na             11                 186            0.970                97.8   
K              19                 227            0.860                63.5   
Rb             37                 248            1.530                39.3   
Cs             55                 265            1.870                28.5   

    Melting Point (K)  
Li             453.65  
Na             370.95  
K              336.65  
Rb             312.45  
Cs             301.65  


In [20]:
# Use the directory structure from Workbook 4
import pandas as pd
import os
base_data_dir = os.path.expanduser("~/data")  # Parent directory
pubchem_data_dir = os.path.join(base_data_dir, "pubchem_data")  # Subdirectory for PubChem
os.makedirs(pubchem_data_dir, exist_ok=True)  # Ensure directories exist
periodictable_csv_datapath = os.path.join(pubchem_data_dir, "PubChemElements_all.csv")
df_periodictable = pd.read_csv(periodictable_csv_datapath)
df_periodictable.head()


Unnamed: 0,AtomicNumber,Symbol,Name,AtomicMass,CPKHexColor,ElectronConfiguration,Electronegativity,AtomicRadius,IonizationEnergy,ElectronAffinity,OxidationStates,StandardState,MeltingPoint,BoilingPoint,Density,GroupBlock,YearDiscovered
0,1,H,Hydrogen,1.008,FFFFFF,1s1,2.2,120.0,13.598,0.754,"+1, -1",Gas,13.81,20.28,9e-05,Nonmetal,1766
1,2,He,Helium,4.0026,D9FFFF,1s2,,140.0,24.587,,0,Gas,0.95,4.22,0.000179,Noble gas,1868
2,3,Li,Lithium,7.0,CC80FF,[He]2s1,0.98,182.0,5.392,0.618,+1,Solid,453.65,1615.0,0.534,Alkali metal,1817
3,4,Be,Beryllium,9.012183,C2FF00,[He]2s2,1.57,153.0,9.323,,+2,Solid,1560.0,2744.0,1.85,Alkaline earth metal,1798
4,5,B,Boron,10.81,FFB5B5,[He]2s2 2p1,2.04,192.0,8.298,0.277,+3,Solid,2348.0,4273.0,2.37,Metalloid,1808


In [21]:
df_periodictable=df_periodictable.set_index("Symbol")
df_periodictable.head()

Unnamed: 0_level_0,AtomicNumber,Name,AtomicMass,CPKHexColor,ElectronConfiguration,Electronegativity,AtomicRadius,IonizationEnergy,ElectronAffinity,OxidationStates,StandardState,MeltingPoint,BoilingPoint,Density,GroupBlock,YearDiscovered
Symbol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
H,1,Hydrogen,1.008,FFFFFF,1s1,2.2,120.0,13.598,0.754,"+1, -1",Gas,13.81,20.28,9e-05,Nonmetal,1766
He,2,Helium,4.0026,D9FFFF,1s2,,140.0,24.587,,0,Gas,0.95,4.22,0.000179,Noble gas,1868
Li,3,Lithium,7.0,CC80FF,[He]2s1,0.98,182.0,5.392,0.618,+1,Solid,453.65,1615.0,0.534,Alkali metal,1817
Be,4,Beryllium,9.012183,C2FF00,[He]2s2,1.57,153.0,9.323,,+2,Solid,1560.0,2744.0,1.85,Alkaline earth metal,1798
B,5,Boron,10.81,FFB5B5,[He]2s2 2p1,2.04,192.0,8.298,0.277,+3,Solid,2348.0,4273.0,2.37,Metalloid,1808


In [22]:
df_periodictable.tail()

Unnamed: 0_level_0,AtomicNumber,Name,AtomicMass,CPKHexColor,ElectronConfiguration,Electronegativity,AtomicRadius,IonizationEnergy,ElectronAffinity,OxidationStates,StandardState,MeltingPoint,BoilingPoint,Density,GroupBlock,YearDiscovered
Symbol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
Fl,114,Flerovium,290.192,,[Rn]7s2 7p2 5f14 6d10 (predicted),,,,,"6, 4,2, 1, 0",Expected to be a Solid,,,,Post-transition metal,1998
Mc,115,Moscovium,290.196,,[Rn]7s2 7p3 5f14 6d10 (predicted),,,,,"3, 1",Expected to be a Solid,,,,Post-transition metal,2003
Lv,116,Livermorium,293.205,,[Rn]7s2 7p4 5f14 6d10 (predicted),,,,,"+4, +2, -2",Expected to be a Solid,,,,Post-transition metal,2000
Ts,117,Tennessine,294.211,,[Rn]7s2 7p5 5f14 6d10 (predicted),,,,,"+5, +3, +1, -1",Expected to be a Solid,,,,Halogen,2010
Og,118,Oganesson,295.216,,[Rn]7s2 7p6 5f14 6d10 (predicted),,,,,"+6, +4, +2, +1, 0, -1",Expected to be a Gas,,,,Noble gas,2006


In [23]:
print(df_periodictable.shape)
print(f'number rows is: {df_periodictable.shape[0]}, \nnumber of columns is {df_periodictable.shape[1]}')
print(df_periodictable.columns)
print(df_periodictable.dtypes)      

(118, 16)
number rows is: 118, 
number of columns is 16
Index(['AtomicNumber', 'Name', 'AtomicMass', 'CPKHexColor',
       'ElectronConfiguration', 'Electronegativity', 'AtomicRadius',
       'IonizationEnergy', 'ElectronAffinity', 'OxidationStates',
       'StandardState', 'MeltingPoint', 'BoilingPoint', 'Density',
       'GroupBlock', 'YearDiscovered'],
      dtype='object')
AtomicNumber               int64
Name                      object
AtomicMass               float64
CPKHexColor               object
ElectronConfiguration     object
Electronegativity        float64
AtomicRadius             float64
IonizationEnergy         float64
ElectronAffinity         float64
OxidationStates           object
StandardState             object
MeltingPoint             float64
BoilingPoint             float64
Density                  float64
GroupBlock                object
YearDiscovered            object
dtype: object


In [24]:
df_periodictable.info

<bound method DataFrame.info of         AtomicNumber         Name  AtomicMass CPKHexColor  \
Symbol                                                      
H                  1     Hydrogen    1.008000      FFFFFF   
He                 2       Helium    4.002600      D9FFFF   
Li                 3      Lithium    7.000000      CC80FF   
Be                 4    Beryllium    9.012183      C2FF00   
B                  5        Boron   10.810000      FFB5B5   
...              ...          ...         ...         ...   
Fl               114    Flerovium  290.192000         NaN   
Mc               115    Moscovium  290.196000         NaN   
Lv               116  Livermorium  293.205000         NaN   
Ts               117   Tennessine  294.211000         NaN   
Og               118    Oganesson  295.216000         NaN   

                    ElectronConfiguration  Electronegativity  AtomicRadius  \
Symbol                                                                       
H                 

In [25]:
df_periodictable.describe()

Unnamed: 0,AtomicNumber,AtomicMass,Electronegativity,AtomicRadius,IonizationEnergy,ElectronAffinity,MeltingPoint,BoilingPoint,Density
count,118.0,118.0,95.0,99.0,102.0,57.0,103.0,93.0,96.0
mean,59.5,146.540281,1.732316,209.464646,7.997255,1.07214,1273.740553,2536.212473,7.608001
std,34.207699,89.768356,0.635187,38.56913,3.339066,0.879163,888.853859,1588.410919,5.878692
min,1.0,1.008,0.7,120.0,3.894,0.079,0.95,4.22,9e-05
25%,30.25,66.48075,1.29,187.0,6.0205,0.47,516.04,1180.0,2.5725
50%,59.5,142.57383,1.62,209.0,6.96,0.754,1191.0,2792.0,7.072
75%,88.75,226.777165,2.17,232.0,8.9985,1.35,1806.5,3618.0,10.27525
max,118.0,295.216,3.98,348.0,24.587,3.617,3823.0,5869.0,22.57


In [26]:
#Check for missing data for each column
df_periodictable.isnull().sum()

AtomicNumber              0
Name                      0
AtomicMass                0
CPKHexColor              10
ElectronConfiguration     0
Electronegativity        23
AtomicRadius             19
IonizationEnergy         16
ElectronAffinity         61
OxidationStates           1
StandardState             0
MeltingPoint             15
BoilingPoint             25
Density                  22
GroupBlock                0
YearDiscovered            0
dtype: int64

In [27]:
df_periodictable.loc['Cr','ElectronConfiguration']

'[Ar]3d5 4s1'

In [28]:

# Define the list of halogens
halogens = ['F', 'Cl', 'Br', 'I', 'At']

# Select ionization energy for halogens
halogen_ionization = df_periodictable.loc[df_periodictable.index.isin(halogens), "IonizationEnergy"]

# Display results
#display(halogen_ionization.to_frame())  # Ensures it shows as a DataFrame
print(halogen_ionization)
print(type(halogen_ionization))

Symbol
F     17.423
Cl    12.968
Br    11.814
I     10.451
At     9.500
Name: IonizationEnergy, dtype: float64
<class 'pandas.core.series.Series'>


In [29]:
print(halogen_ionization)


Symbol
F     17.423
Cl    12.968
Br    11.814
I     10.451
At     9.500
Name: IonizationEnergy, dtype: float64


In [30]:
print(halogen_ionization.to_string())


Symbol
F     17.423
Cl    12.968
Br    11.814
I     10.451
At     9.500


In [31]:
print(halogen_ionization.to_frame())
print(type(halogen_ionization.to_frame()))

        IonizationEnergy
Symbol                  
F                 17.423
Cl                12.968
Br                11.814
I                 10.451
At                 9.500
<class 'pandas.core.frame.DataFrame'>


In [32]:
print(halogen_ionization.to_frame().to_string())
print(type(halogen_ionization.to_frame().to_string()))

        IonizationEnergy
Symbol                  
F                 17.423
Cl                12.968
Br                11.814
I                 10.451
At                 9.500
<class 'str'>


In [33]:
halogen_melt_boil_pts = df_periodictable.loc[df_periodictable.index.isin(halogens), ["MeltingPoint", "BoilingPoint"]]
print(halogen_melt_boil_pts)
print(type(halogen_melt_boil_pts))

        MeltingPoint  BoilingPoint
Symbol                            
F              53.53         85.03
Cl            171.65        239.11
Br            265.95        331.95
I             386.85        457.55
At            575.00           NaN
<class 'pandas.core.frame.DataFrame'>
