In [16]:
import sqlite3

import pandas as pd

from sklearn.feature_extraction.text import CountVectorizer

# CONFIG
DB_PATH = '1_after_cleaning_databases/1000_calc_media.db'
TABLE_NAME = 'css_attribute'
OUTPUT_NAME = 'feature_matrix_media_calc'

## PANDAS
pd.set_option('future.no_silent_downcasting', True)
# Set pandas options to display the full DataFrame
pd.set_option('display.max_rows', None)  # Show all rows
pd.set_option('display.max_columns', None)  # Show all columns
pd.set_option('display.width', None)  # Disable line wrapping
pd.set_option('display.max_colwidth', None)  # Show full content in each column
pd.set_option('display.max_seq_item', 100000)  # Show all items in a sequence (e.g., lists, dictionaries)

# Optionally, increase the display buffer size
pd.set_option('display.max_info_columns', 100000)  # Show all columns in df.info()

# If you have very large dataframes, increasing this might help
pd.set_option('display.max_rows', 1000)  # You can change 1000 to a higher value if needed

### Functions

#### Loading data from sqlite to pandas dataframe

In [17]:
def load_sqlite_to_dataframe(db_path, table_name):
    conn = sqlite3.connect(db_path)
    df = pd.read_sql(f"SELECT * FROM {table_name}", conn)
    conn.close()
    return df

#### Changing boolean values to 1 and 0

In [18]:
def convert_booleans(value):
    true_values = {'enabled'}
    false_values = {'disabled'}

    if isinstance(value, str):
        val_lower = value.lower()
        if val_lower in true_values:
            return 1
        elif val_lower in false_values:
            return 0
    return value

#### Constructing feature matrix

In [19]:
def create_feature_matrix(df):
    # Handle fonts
    fonts = (
        df[df['attribute'] == 'font']
        .groupby('session_id')['value']
        .apply(lambda x: ';'.join(sorted(set(x))))
        .reset_index()
    )

    # replace spaces in font names with underscores
    fonts['value'] = fonts['value'].str.replace(' ', '_', regex=False)

    # Rest of features
    non_fonts = df[df['attribute'] != 'font'].copy()
    non_fonts['feature_name'] = non_fonts['source'] + ':' + non_fonts['attribute']

    # Apply boolean conversion
    non_fonts['value'] = non_fonts['value'].apply(convert_booleans)

    # Pivot table
    pivot = non_fonts.pivot_table(index='session_id',
                                  columns='feature_name',
                                  values='value',
                                  aggfunc='first').reset_index()

    # Merge fonts back
    full_df = pivot.merge(fonts, on='session_id', how='left')

    full_df.rename(columns={'value': 'fonts'}, inplace=True)

    return full_df

#### Changing font values to one-hot encoding

In [20]:
def vectorize_fonts(df, font_column='fonts'):
    vectorizer = CountVectorizer(tokenizer=lambda x: [token.strip() for token in x.split(';') if token.strip()])
    font_features = vectorizer.fit_transform(df[font_column].fillna(''))

    font_df = pd.DataFrame(
        font_features.toarray(),
        columns=[f'css:font_{f}' for f in vectorizer.get_feature_names_out()],
        index=df.index
    )

    font_df['css:font_vector'] = font_df.astype(str).agg(''.join, axis=1)

    df = df.drop(columns=[font_column]).join(font_df)
    return df

#### Cleaning feature types

In [21]:
def clean_feature_types(df):
    for col in df.columns:
        # todo bedzie trzeba na to spojrzec gdy dojda kolejne atrybuty bo jest to takie niezbyt uniwersalne
        if col.startswith('css:env') or col.startswith("css:media") or col.startswith('js:env') or 'width' in col or 'height' in col:
            df[col] = pd.to_numeric(df[col], errors='coerce')

    # Fill missing numeric with -1
    numeric_cols = df.select_dtypes(include='number').columns
    df[numeric_cols] = df[numeric_cols].fillna(-1)

    # Fill missing non-numeric with empty string
    object_cols = df.select_dtypes(include='object').columns
    df[object_cols] = df[object_cols].fillna('')

    df = df.infer_objects()

    return df

#### Save datagframe to parquet

In [22]:
def save_features(df):
    df.to_parquet(f'2_after_feature_extraction/{OUTPUT_NAME}.parquet')
    df.to_csv(f'2_after_feature_extraction/{OUTPUT_NAME}.csv', index=False)

### Execution

In [23]:
# Load data
df_raw = load_sqlite_to_dataframe(DB_PATH, TABLE_NAME)

In [24]:
# Create feature matrix
df_features = create_feature_matrix(df_raw)

In [25]:
df_features

Unnamed: 0,session_id,browserstack:browser,browserstack:browser_version,browserstack:os,browserstack:os_version,browserstack:real_height,browserstack:real_width,css:User-Agent,css:calc-10-width,css:calc-2-width,css:calc-3-width,css:calc-4-width,css:calc-5-width,css:calc-6-width,css:calc-7-width,css:calc-8-width,css:calc-9-width,css:env-1-height,css:env-1-width,css:env-10-height,css:env-10-width,css:env-11-height,css:env-11-width,css:env-12-height,css:env-12-width,css:env-13-height,css:env-13-width,css:env-14-height,css:env-14-width,css:env-2-height,css:env-2-width,css:env-3-height,css:env-3-width,css:env-5-height,css:env-5-width,css:env-6-height,css:env-6-width,css:env-7-height,css:env-7-width,css:env-8-height,css:env-8-width,css:env-9-height,css:env-9-width,css:image-set-heif,css:javascript,css:media-1-width,css:media-10-width,css:media-2-width,css:media-3-width,css:media-4-width,css:media-5-width,css:media-6-width,css:media-7-width,css:media-8-width,css:media-9-width,css:px_per_px,css:viewport_height,css:viewport_width,js:calc-1-container-height,js:calc-1-container-width,js:calc-10-container-height,js:calc-10-container-width,js:calc-2-container-height,js:calc-2-container-width,js:calc-3-container-height,js:calc-3-container-width,js:calc-4-container-height,js:calc-4-container-width,js:calc-5-container-height,js:calc-5-container-width,js:calc-6-container-height,js:calc-6-container-width,js:calc-7-container-height,js:calc-7-container-width,js:calc-8-container-height,js:calc-8-container-width,js:calc-9-container-height,js:calc-9-container-width,js:env-1-container-height,js:env-1-container-width,js:env-10-container-height,js:env-10-container-width,js:env-11-container-height,js:env-11-container-width,js:env-12-container-height,js:env-12-container-width,js:env-13-container-height,js:env-13-container-width,js:env-14-container-height,js:env-14-container-width,js:env-2-container-height,js:env-2-container-width,js:env-3-container-height,js:env-3-container-width,js:env-4-container-height,js:env-4-container-width,js:env-5-container-height,js:env-5-container-width,js:env-6-container-height,js:env-6-container-width,js:env-7-container-height,js:env-7-container-width,js:env-8-container-height,js:env-8-container-width,js:env-9-container-height,js:env-9-container-width,js:media-1-container-height,js:media-1-container-width,js:media-10-container-height,js:media-10-container-width,js:media-2-container-height,js:media-2-container-width,js:media-3-container-height,js:media-3-container-width,js:media-4-container-height,js:media-4-container-width,js:media-5-container-height,js:media-5-container-width,js:media-6-container-height,js:media-6-container-width,js:media-7-container-height,js:media-7-container-width,js:media-8-container-height,js:media-8-container-width,js:media-9-container-height,js:media-9-container-width,fonts
0,0049b167-77a4-4a03-9b6b-bf83485e0fae,chrome,114.0,OS X,Mojave,1013,1200,"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36",0.239583,7.9375,279.102,6.496094,0.604167,938.685,19.6016,139.102,0.207031,136,195,166,711,175,1100,123,6763,125,84,130,78,188,403,110,133,100,600,164,182,123,554,78,587,115,678,0.0,1,32,3,17,128,32,68,20,1,8,5,1,8,12,200.0,1.765625,200.0,0.234375,200.0,7.921875,200.0,279.09375,200.0,6.484375,200.0,0.59375,200.0,938.671875,200.0,19.59375,200.0,139.09375,200.0,0.203125,136.0,195.75,166.0,711.0,175.671875,1184.0,123.0,6763.546875,125.0,84.234375,130.40625,78.0,188.0625,403.125,110.0,133.75,150.0,300.0,100.0,600.0,164.0,182.0,123.1875,554.0,78.0,587.84375,115.015625,677.984375,0,40.0,0,3.0,0,17.0,0,1184.0,0,32,0,68,0,20.0,0,1.0,0,8,0,5.0,Menlo
1,005ffba4-b5d6-42d6-bea1-d678a7986805,edge,118.0,OS X,Sonoma,1011,1200,"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 Edg/118.0.2088.46",0.239583,7.9375,279.102,6.496094,0.604167,938.685,19.6016,139.102,0.207031,136,195,166,712,175,1100,123,6762,125,84,130,78,188,403,110,133,100,600,164,183,123,554,78,587,115,681,0.0,1,32,7,17,128,32,68,20,1,8,5,1,8,12,200.0,1.765625,200.0,0.234375,200.0,7.921875,200.0,279.09375,200.0,6.484375,200.0,0.59375,200.0,938.671875,200.0,19.59375,200.0,139.09375,200.0,0.203125,136.0,195.9375,166.0,712.0,175.671875,1184.0,123.0,6762.40625,125.0,84.234375,130.40625,78.0,188.0625,403.125,110.0,133.75,150.0,300.0,100.0,600.0,164.0,183.0,123.1875,554.0,78.0,587.9375,115.015625,680.984375,0,40.0,0,7.0,0,17.0,0,1184.0,0,32,0,68,0,20.0,0,1.0,0,8,0,5.0,Menlo
2,00753e05-2b50-47cf-a387-2aebb1e0645c,edge,113.0,OS X,Sequoia,1011,1200,"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36 Edg/113.0.1774.35",0.239583,7.9375,279.102,6.496094,0.604167,938.685,19.6016,139.102,0.207031,136,195,166,711,175,1100,124,6708,125,84,130,78,188,403,110,133,100,600,164,182,123,554,78,587,115,678,0.0,1,32,3,17,128,32,68,20,1,8,5,1,8,12,200.0,1.765625,200.0,0.234375,200.0,7.921875,200.0,279.09375,200.0,6.484375,200.0,0.59375,200.0,938.671875,200.0,19.59375,200.0,139.09375,200.0,0.203125,136.0,195.75,166.0,711.0,175.671875,1184.0,124.0,6708.5625,125.0,84.234375,130.40625,78.0,188.0625,403.125,110.0,133.75,150.0,300.0,100.0,600.0,164.0,182.0,123.1875,554.0,78.0,587.84375,115.015625,677.984375,0,40.0,0,3.0,0,17.0,0,1184.0,0,32,0,68,0,20.0,0,1.0,0,8,0,5.0,Menlo
3,007a487e-fe9b-4519-867f-bd043b7d3cc9,firefox,112.0,Windows,7,980,1296,Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:109.0) Gecko/20100101 Firefox/112.0,50.0333,0.0,94.4833,3.18333,0.0,2107.983398,16.5,0.0,0.0,136,189,311,421,166,1100,127,7026,122,74,130,74,184,400,110,133,100,600,143,165,134,531,73,598,113,636,0.0,1,32,11,17,128,32,68,20,1,10,5,1,8,12,200.0,1.86666870117188,200.0,50.0333404541016,200.0,0.0,200.0,94.4833374023438,200.0,3.18333435058594,200.0,0.0,200.0,2107.9833984375,200.0,16.5,200.0,0.0,200.0,0.0,136.0,189.399993896484,311.0,421.0,166.0,1141.33337402344,127.5,7026.033203125,122.0,74.2333374023438,130.399993896484,74.0,184.766662597656,400.283325195313,110.0,133.766662597656,150.0,300.0,100.0,600.0,143.0,165.016662597656,134.16667175293,531.599975585938,73.0,598.799987792969,113.0,636.333312988281,0,40.0,0,11.0,0,17.0,0,1247.0,0,32,0,68,0,20.0,0,1.0,0,10,0,5.0,Arabic_Typesetting;Batang;Calibri;Franklin_Gothic;Levenim_MT;MS_Mincho;MS_UI_Gothic;Marlett;Meiryo_UI;Microsoft_Uighur;Segoe_UI_Light;SimHei;Vrinda
4,00977aea-b024-4b43-8f10-89b9323fe77a,chrome_android,unknown,android,10.0,727,393,"Mozilla/5.0 (Linux; Android 10; K) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Mobile Safari/537.36",5.602941,7.953125,274.921875,6.496094,0.370536,938.622253,19.651787,59.558338,0.327381,130,224,169,785,167,1100,121,6866,126,85,131,76,163,454,105,150,100,377,163,169,51,487,66,600,115,671,0.0,1,32,4,49,128,32,68,41,2,8,5,3,7,3,200.0,12201610.0,200.0,5.60227298736572,200.0,7.9488639831543,200.0,274.920471191406,200.0,6.49431848526001,200.0,0.369318187236786,200.0,938.619323730469,200.0,19.6477279663086,200.0,59.5568199157715,200.0,0.323863655328751,130.0,224.727279663086,169.272735595703,785.238647460938,167.727279663086,1435.7216796875,121.454551696777,6866.818359375,126.181823730469,85.8977279663086,131.3125,76.7272720336914,163.0,454.653411865234,105.454551696777,150.329544067383,150.0,300.0,100.0,377.090911865234,163.272735595703,169.454544067383,51.8068199157715,487.5625,66.8181838989258,618.909118652344,115.454551696777,671.261413574219,0,48.0,0,4.0,0,49.0,0,377.090911865234,0,32,0,68,0,41.0,0,2.0,0,8,0,5.0,sans-serif-thin
5,00e5efaf-23b5-4884-9b6a-7bb5df20ffcc,firefox,123.0,OS X,Monterey,949,1280,Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:123.0) Gecko/20100101 Firefox/123.0,0.016663,0.05,93.016663,2.98333,0.0,2107.983398,16.5,0.0,5.383331,124,196,309,470,164,1100,116,6973,124,74,130,74,180,400,104,133,100,600,156,164,126,531,56,600,111,737,0.0,1,32,7,17,128,32,64,20,1,14,5,1,8,12,200.0,0.0,200.0,0.01666259765625,200.0,0.0500030517578125,200.0,93.0166625976563,200.0,2.98333740234375,200.0,0.0,200.0,2107.9833984375,200.0,16.5,200.0,0.0,200.0,5.38333129882813,124.800003051758,196.066665649414,309.0,470.266662597656,164.0,1426.0,116.0,6973.783203125,124.0,74.2333374023438,130.399993896484,74.0,180.566665649414,400.283325195313,104.0,133.766662597656,150.0,300.0,100.0,600.0,156.0,164.683334350586,126.366668701172,531.599975585938,56.6000061035156,602.13330078125,111.0,737.700012207031,0,40.0,0,7.0,0,17.0,0,1264.0,0,32,0,64,0,20.0,0,1.0,0,14,0,5.0,Menlo
6,010c339b-09a6-43f5-aa80-bd9d71564bb2,chrome,106.0,Windows,7,1060,945,"Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/106.0.0.0 Safari/537.36",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,136,190,166,732,175,1100,126,7000,129,81,130,78,189,403,110,133,100,600,164,170,126,550,78,585,115,657,,1,32,11,17,128,0,68,20,1,8,5,1,9,9,200.0,0.0,200.0,0.0,200.0,0.0,200.0,0.0,200.0,0.0,200.0,0.0,200.0,0.0,200.0,0.0,200.0,0.0,200.0,0.0,136.0,190.5,166.0,732.0,175.671875,1140.0,126.0,7000.84375,129.0,81.34375,130.40625,78.0,189.0625,403.125,110.0,133.75,150.0,300.0,100.0,600.0,164.0,170.0,126.1875,550.671875,78.0,585.21875,115.015625,656.984375,0,40.0,0,11.0,0,17.0,0,904.0,0,0,0,68,0,20.0,0,1.0,0,8,0,5.0,Arabic_Typesetting;Batang;Calibri;Franklin_Gothic;Levenim_MT;MS_Mincho;MS_UI_Gothic;Marlett;Meiryo_UI;Microsoft_Uighur;Segoe_UI_Light;SimHei;Vrinda
7,019427d2-f7c6-4796-97a3-60a4f3058c1e,firefox,111.0,OS X,Ventura,949,1280,Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:109.0) Gecko/20100101 Firefox/111.0,50.0333,0.0,94.4833,3.383331,0.0,2107.983398,16.5,0.0,0.0,124,196,309,472,164,1100,120,7108,124,74,130,74,180,400,104,133,100,600,156,164,126,531,56,600,111,743,0.0,1,32,3,17,128,32,64,20,1,14,5,1,8,12,200.0,0.0,200.0,50.0333404541016,200.0,0.0,200.0,94.4833374023438,200.0,3.38333129882813,200.0,0.0,200.0,2107.9833984375,200.0,16.5,200.0,0.0,200.0,0.0,124.800003051758,196.066665649414,309.0,472.933349609375,164.0,1264.0,120.399993896484,7108.11669921875,124.0,74.2333374023438,130.399993896484,74.0,180.566665649414,400.283325195313,104.0,133.766662597656,150.0,300.0,100.0,600.0,156.0,164.683334350586,126.366668701172,531.599975585938,56.6000061035156,602.13330078125,111.0,743.733337402344,0,40.0,0,3.0,0,17.0,0,1264.0,0,32,0,64,0,20.0,0,1.0,0,14,0,5.0,Menlo
8,021e4bf3-a0b7-4ce9-8a52-5cf56594feb2,chrome_android,unknown,android,12.0,1037,753,"Mozilla/5.0 (Linux; Android 10; K) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36",5.602941,7.953125,274.921875,6.496094,0.370536,938.622253,19.651787,59.558338,0.327381,130,224,168,784,167,1100,121,6866,126,90,130,77,182,468,105,150,100,600,163,170,109,637,67,600,114,672,0.0,1,32,7,17,128,32,68,36,3,8,5,3,11,7,200.0,15790319.0,200.0,5.60294103622437,200.0,7.94852924346924,200.0,274.919128417969,200.0,6.49264717102051,200.0,0.367647051811218,200.0,938.61767578125,200.0,19.6470584869385,200.0,59.5514717102051,200.0,0.323529422283173,130.0,224.691177368164,168.0,784.683837890625,167.235290527344,1429.34558105469,121.882354736328,6866.81640625,126.117645263672,90.1102981567383,130.757354736328,77.4117660522461,182.308822631836,468.220581054688,105.882354736328,150.33088684082,150.0,300.0,100.0,600.0,163.058822631836,170.117645263672,109.338233947754,637.36767578125,67.5882339477539,645.941162109375,114.470588684082,672.698547363281,0,48.0,0,7.0,0,17.0,0,736.941162109375,0,32,0,68,0,36.0,0,3.0,0,8,0,5.0,sans-serif-thin
9,02bf78dc-6ebd-4cf5-b7f0-91ae2d131e6e,chrome,110.0,Windows,11,1012,945,"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,136,190,166,732,175,1100,126,6970,129,81,130,78,189,403,110,133,100,600,164,170,126,550,78,585,115,650,,1,32,3,17,128,0,68,20,1,8,5,1,9,9,200.0,0.0,200.0,0.0,200.0,0.0,200.0,0.0,200.0,0.0,200.0,0.0,200.0,0.0,200.0,0.0,200.0,0.0,200.0,0.0,136.0,190.5,166.0,732.0,175.671875,1140.0,126.0,6970.515625,129.0,81.34375,130.40625,78.0,189.0625,403.125,110.0,133.75,150.0,300.0,100.0,600.0,164.0,170.0,126.1875,550.671875,78.0,585.21875,115.015625,649.984375,0,40.0,0,3.0,0,17.0,0,896.0,0,0,0,68,0,20.0,0,1.0,0,8,0,5.0,Calibri;Franklin_Gothic;MS_UI_Gothic;Marlett;Segoe_UI_Light


In [26]:
# Vectorize fonts
df_features = vectorize_fonts(df_features)



In [27]:
# Clean feature types
df_ready = clean_feature_types(df_features)

In [28]:
# test Output
print(f"Shape of feature matrix: {df_ready.shape}")
display(df_ready.head())

Shape of feature matrix: (922, 142)


Unnamed: 0,session_id,browserstack:browser,browserstack:browser_version,browserstack:os,browserstack:os_version,browserstack:real_height,browserstack:real_width,css:User-Agent,css:calc-10-width,css:calc-2-width,css:calc-3-width,css:calc-4-width,css:calc-5-width,css:calc-6-width,css:calc-7-width,css:calc-8-width,css:calc-9-width,css:env-1-height,css:env-1-width,css:env-10-height,css:env-10-width,css:env-11-height,css:env-11-width,css:env-12-height,css:env-12-width,css:env-13-height,css:env-13-width,css:env-14-height,css:env-14-width,css:env-2-height,css:env-2-width,css:env-3-height,css:env-3-width,css:env-5-height,css:env-5-width,css:env-6-height,css:env-6-width,css:env-7-height,css:env-7-width,css:env-8-height,css:env-8-width,css:env-9-height,css:env-9-width,css:image-set-heif,css:javascript,css:media-1-width,css:media-10-width,css:media-2-width,css:media-3-width,css:media-4-width,css:media-5-width,css:media-6-width,css:media-7-width,css:media-8-width,css:media-9-width,css:px_per_px,css:viewport_height,css:viewport_width,js:calc-1-container-height,js:calc-1-container-width,js:calc-10-container-height,js:calc-10-container-width,js:calc-2-container-height,js:calc-2-container-width,js:calc-3-container-height,js:calc-3-container-width,js:calc-4-container-height,js:calc-4-container-width,js:calc-5-container-height,js:calc-5-container-width,js:calc-6-container-height,js:calc-6-container-width,js:calc-7-container-height,js:calc-7-container-width,js:calc-8-container-height,js:calc-8-container-width,js:calc-9-container-height,js:calc-9-container-width,js:env-1-container-height,js:env-1-container-width,js:env-10-container-height,js:env-10-container-width,js:env-11-container-height,js:env-11-container-width,js:env-12-container-height,js:env-12-container-width,js:env-13-container-height,js:env-13-container-width,js:env-14-container-height,js:env-14-container-width,js:env-2-container-height,js:env-2-container-width,js:env-3-container-height,js:env-3-container-width,js:env-4-container-height,js:env-4-container-width,js:env-5-container-height,js:env-5-container-width,js:env-6-container-height,js:env-6-container-width,js:env-7-container-height,js:env-7-container-width,js:env-8-container-height,js:env-8-container-width,js:env-9-container-height,js:env-9-container-width,js:media-1-container-height,js:media-1-container-width,js:media-10-container-height,js:media-10-container-width,js:media-2-container-height,js:media-2-container-width,js:media-3-container-height,js:media-3-container-width,js:media-4-container-height,js:media-4-container-width,js:media-5-container-height,js:media-5-container-width,js:media-6-container-height,js:media-6-container-width,js:media-7-container-height,js:media-7-container-width,js:media-8-container-height,js:media-8-container-width,js:media-9-container-height,js:media-9-container-width,css:font_arabic_typesetting,css:font_batang,css:font_calibri,css:font_franklin_gothic,css:font_levenim_mt,css:font_marlett,css:font_meiryo_ui,css:font_menlo,css:font_microsoft_uighur,css:font_ms_mincho,css:font_ms_ui_gothic,css:font_sans-serif-thin,css:font_segoe_ui_light,css:font_simhei,css:font_vrinda,css:font_vector
0,0049b167-77a4-4a03-9b6b-bf83485e0fae,chrome,114.0,OS X,Mojave,1013,1200,"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36",0.239583,7.9375,279.102,6.496094,0.604167,938.685,19.6016,139.102,0.207031,136,195,166,711,175,1100,123,6763,125,84,130,78,188,403,110,133,100,600,164,182,123,554,78,587,115,678,0,1,32,3,17,128,32,68,20,1,8,5,1,8,12,200.0,1.765625,200.0,0.234375,200.0,7.921875,200.0,279.09375,200.0,6.484375,200.0,0.59375,200.0,938.671875,200.0,19.59375,200.0,139.09375,200.0,0.203125,136.0,195.75,166.0,711.0,175.671875,1184.0,123.0,6763.546875,125.0,84.234375,130.40625,78.0,188.0625,403.125,110.0,133.75,150.0,300.0,100.0,600.0,164.0,182.0,123.1875,554.0,78.0,587.84375,115.015625,677.984375,0,40.0,0,3.0,0,17.0,0,1184.0,0,32,0,68,0,20.0,0,1.0,0,8,0,5.0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,10000000
1,005ffba4-b5d6-42d6-bea1-d678a7986805,edge,118.0,OS X,Sonoma,1011,1200,"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 Edg/118.0.2088.46",0.239583,7.9375,279.102,6.496094,0.604167,938.685,19.6016,139.102,0.207031,136,195,166,712,175,1100,123,6762,125,84,130,78,188,403,110,133,100,600,164,183,123,554,78,587,115,681,0,1,32,7,17,128,32,68,20,1,8,5,1,8,12,200.0,1.765625,200.0,0.234375,200.0,7.921875,200.0,279.09375,200.0,6.484375,200.0,0.59375,200.0,938.671875,200.0,19.59375,200.0,139.09375,200.0,0.203125,136.0,195.9375,166.0,712.0,175.671875,1184.0,123.0,6762.40625,125.0,84.234375,130.40625,78.0,188.0625,403.125,110.0,133.75,150.0,300.0,100.0,600.0,164.0,183.0,123.1875,554.0,78.0,587.9375,115.015625,680.984375,0,40.0,0,7.0,0,17.0,0,1184.0,0,32,0,68,0,20.0,0,1.0,0,8,0,5.0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,10000000
2,00753e05-2b50-47cf-a387-2aebb1e0645c,edge,113.0,OS X,Sequoia,1011,1200,"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36 Edg/113.0.1774.35",0.239583,7.9375,279.102,6.496094,0.604167,938.685,19.6016,139.102,0.207031,136,195,166,711,175,1100,124,6708,125,84,130,78,188,403,110,133,100,600,164,182,123,554,78,587,115,678,0,1,32,3,17,128,32,68,20,1,8,5,1,8,12,200.0,1.765625,200.0,0.234375,200.0,7.921875,200.0,279.09375,200.0,6.484375,200.0,0.59375,200.0,938.671875,200.0,19.59375,200.0,139.09375,200.0,0.203125,136.0,195.75,166.0,711.0,175.671875,1184.0,124.0,6708.5625,125.0,84.234375,130.40625,78.0,188.0625,403.125,110.0,133.75,150.0,300.0,100.0,600.0,164.0,182.0,123.1875,554.0,78.0,587.84375,115.015625,677.984375,0,40.0,0,3.0,0,17.0,0,1184.0,0,32,0,68,0,20.0,0,1.0,0,8,0,5.0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,10000000
3,007a487e-fe9b-4519-867f-bd043b7d3cc9,firefox,112.0,Windows,7,980,1296,Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:109.0) Gecko/20100101 Firefox/112.0,50.0333,0.0,94.4833,3.18333,0.0,2107.983398,16.5,0.0,0.0,136,189,311,421,166,1100,127,7026,122,74,130,74,184,400,110,133,100,600,143,165,134,531,73,598,113,636,0,1,32,11,17,128,32,68,20,1,10,5,1,8,12,200.0,1.866669,200.0,50.03334,200.0,0.0,200.0,94.483337,200.0,3.183334,200.0,0.0,200.0,2107.983398,200.0,16.5,200.0,0.0,200.0,0.0,136.0,189.399994,311.0,421.0,166.0,1141.333374,127.5,7026.033203,122.0,74.233337,130.399994,74.0,184.766663,400.283325,110.0,133.766663,150.0,300.0,100.0,600.0,143.0,165.016663,134.166672,531.599976,73.0,598.799988,113.0,636.333313,0,40.0,0,11.0,0,17.0,0,1247.0,0,32,0,68,0,20.0,0,1.0,0,10,0,5.0,1,1,1,1,1,1,1,0,1,1,1,0,1,1,1,111111101110111
4,00977aea-b024-4b43-8f10-89b9323fe77a,chrome_android,unknown,android,10.0,727,393,"Mozilla/5.0 (Linux; Android 10; K) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Mobile Safari/537.36",5.602941,7.953125,274.921875,6.496094,0.370536,938.622253,19.651787,59.558338,0.327381,130,224,169,785,167,1100,121,6866,126,85,131,76,163,454,105,150,100,377,163,169,51,487,66,600,115,671,0,1,32,4,49,128,32,68,41,2,8,5,3,7,3,200.0,12201610.0,200.0,5.602273,200.0,7.948864,200.0,274.920471,200.0,6.494318,200.0,0.369318,200.0,938.619324,200.0,19.647728,200.0,59.55682,200.0,0.323864,130.0,224.72728,169.272736,785.238647,167.72728,1435.72168,121.454552,6866.818359,126.181824,85.897728,131.3125,76.727272,163.0,454.653412,105.454552,150.329544,150.0,300.0,100.0,377.090912,163.272736,169.454544,51.80682,487.5625,66.818184,618.909119,115.454552,671.261414,0,48.0,0,4.0,0,49.0,0,377.090912,0,32,0,68,0,41.0,0,2.0,0,8,0,5.0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1000


In [29]:
# how many features there are with css in its name
df_ready.filter(regex='css').shape[1]

67

In [30]:
save_features(df_ready)

In [31]:
# list all columns
print(df_ready.columns)

Index(['session_id', 'browserstack:browser', 'browserstack:browser_version',
       'browserstack:os', 'browserstack:os_version',
       'browserstack:real_height', 'browserstack:real_width', 'css:User-Agent',
       'css:calc-10-width', 'css:calc-2-width', 'css:calc-3-width',
       'css:calc-4-width', 'css:calc-5-width', 'css:calc-6-width',
       'css:calc-7-width', 'css:calc-8-width', 'css:calc-9-width',
       'css:env-1-height', 'css:env-1-width', 'css:env-10-height',
       'css:env-10-width', 'css:env-11-height', 'css:env-11-width',
       'css:env-12-height', 'css:env-12-width', 'css:env-13-height',
       'css:env-13-width', 'css:env-14-height', 'css:env-14-width',
       'css:env-2-height', 'css:env-2-width', 'css:env-3-height',
       'css:env-3-width', 'css:env-5-height', 'css:env-5-width',
       'css:env-6-height', 'css:env-6-width', 'css:env-7-height',
       'css:env-7-width', 'css:env-8-height', 'css:env-8-width',
       'css:env-9-height', 'css:env-9-width', 'css:ima