In [None]:
import kagglehub
path = kagglehub.dataset_download('tfisthis/global-ai-tool-adoption-across-industries')
print(path)

/kaggle/input/global-ai-tool-adoption-across-industries


In [None]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go

In [None]:
df = pd.read_csv('/kaggle/input/global-ai-tool-adoption-across-industries/ai_adoption_dataset.csv')

In [None]:
top_tools = df.groupby('ai_tool')['daily_active_users'].sum().reset_index().sort_values(by='daily_active_users', ascending=False)
fig = px.bar(
    top_tools,
    x='ai_tool',
    y='daily_active_users',
    color='ai_tool',
    title='🔥 Most Used AI Tools (by Daily Active Users)',
    labels={'daily_active_users': 'Total Daily Active Users'}
)
fig.update_layout(showlegend=False)
fig.show()

In [None]:
grouped = df.groupby(['country', 'ai_tool']).agg(
    adoption_rate=('adoption_rate', 'mean'),
    count=('ai_tool', 'count')
).reset_index()
fig = px.bar(
    grouped,
    x='adoption_rate',
    y='country',
    color='country',
    animation_frame='ai_tool',
    hover_data={'count': True, 'adoption_rate': ':.2f'},
    title='📊 Average Adoption Rate by Country per AI Tool',
    labels={'adoption_rate': 'Adoption Rate (%)'}
)
fig.update_layout(
    yaxis_title="Country",
    xaxis_title="Adoption Rate (%)",
    showlegend=False,
    updatemenus=[{
        "type": "buttons",
        "showactive": False,
        "buttons": [{
            "label": "Play",
            "method": "animate",
            "args": [None, {"frame": {"duration": 1000, "redraw": True}, "fromcurrent": True}]
        }, {
            "label": "Pause",
            "method": "animate",
            "args": [[None], {"mode": "immediate", "frame": {"duration": 0}, "transition": {"duration": 0}}]
        }]
    }],
    xaxis=dict(range=[48, 51.7])
)
fig.update_traces(hovertemplate='<b>%{x}</b><br>Adoption: %{y:.2f}%<br>Count: %{customdata[0]}')
fig.show()

In [None]:
industry_grp = df.groupby(['industry', 'ai_tool']).agg(
    adoption_rate=('adoption_rate', 'mean'),
    count=('ai_tool', 'count')
).reset_index()
fig = px.bar(
    industry_grp,
    y='industry',
    x='adoption_rate',
    color='industry',
    animation_frame='ai_tool',
    hover_data={'count': True, 'adoption_rate': ':.2f'},
    title='🏭 AI Tool Adoption by Industry',
    labels={'adoption_rate': 'Adoption Rate (%)'}
)
fig.update_layout(
    yaxis_title="Industry",
    xaxis_title="Adoption Rate (%)",
    xaxis_tickangle=45,
    showlegend=False,
    updatemenus=[{
        "type": "buttons",
        "showactive": False,
        "buttons": [{
            "label": "Play",
            "method": "animate",
            "args": [None, {"frame": {"duration": 1000, "redraw": True}, "fromcurrent": True}]
        }, {
            "label": "Pause",
            "method": "animate",
            "args": [[None], {"mode": "immediate", "frame": {"duration": 0}, "transition": {"duration": 0}}]
        }]
    }],
    xaxis=dict(range=[48, 51])
)
fig.update_traces(hovertemplate='<b>%{x}</b><br>Adoption: %{y:.2f}%<br>Count: %{customdata[0]}')
fig.show()

In [None]:
company_df = df.groupby(['company_size', 'ai_tool']).agg(
    adoption_rate=('adoption_rate', 'mean'),
    count=('ai_tool', 'count')
).reset_index()
fig = px.bar(
    company_df,
    y='company_size',
    x='adoption_rate',
    color='company_size',
    animation_frame='ai_tool',
    hover_data={'count': True, 'adoption_rate': ':.2f'},
    title='🏢 AI Tool Adoption by Company Size',
    labels={'adoption_rate': 'Adoption Rate (%)'}
)
fig.update_layout(
    yaxis_title="Company Size",
    xaxis_title="Adoption Rate (%)",
    xaxis_categoryorder='category ascending',
    showlegend=False,
    updatemenus=[{
        "type": "buttons",
        "showactive": False,
        "buttons": [{
            "label": "Play",
            "method": "animate",
            "args": [None, {"frame": {"duration": 1000, "redraw": True}, "fromcurrent": True}]
        }, {
            "label": "Pause",
            "method": "animate",
            "args": [[None], {"mode": "immediate", "frame": {"duration": 0}, "transition": {"duration": 0}}]
        }]
    }],
    xaxis=dict(range=[48, 51])
)
fig.update_traces(hovertemplate='<b>%{x}</b><br>Adoption: %{y:.2f}%<br>Count: %{customdata[0]}')
fig.show()

In [None]:
trend_df = df.groupby(['year', 'ai_tool'])['adoption_rate'].mean().reset_index()
tools = trend_df['ai_tool'].unique()
fig = go.Figure()
for tool in tools:
    filtered = trend_df[trend_df['ai_tool'] == tool]
    fig.add_trace(go.Scatter(
        x=filtered['year'],
        y=filtered['adoption_rate'],
        mode='lines+markers',
        name=tool,
        visible=(tool == tools[0])  # Only first one is visible by default
    ))
dropdown_buttons = [
    dict(
        method="update",
        label=tool,
        args=[{"visible": [t == tool for t in tools]},
              {"title": f"📈 Adoption Trend for {tool} Over Time"}]
    )
    for tool in tools
]
fig.update_layout(
    updatemenus=[dict(
        buttons=dropdown_buttons,
        direction="down",
        x=1.05,
        y=1.1,
        showactive=True
    )],
    title=f"📈 Adoption Trend for {tools[0]} Over Time",
    xaxis_title="Year",
    yaxis_title="Adoption Rate (%)",
    xaxis=dict(dtick=1),
    showlegend=False
)
fig.show()

In [None]:
pivot = df.groupby(['year', 'ai_tool'])['adoption_rate'].sum().reset_index()
pivot['adoption_share'] = pivot.groupby('year')['adoption_rate'].transform(lambda x: x / x.sum())
top_tools_2023 = pivot[pivot['year'] == 2023].sort_values('adoption_share', ascending=False).head(3)['ai_tool'].tolist()
top_tools = top_tools_2023
labels = [f"{tool} 2023" for tool in top_tools] + [f"{tool} 2024" for tool in top_tools]
tool_indices_by_year = {
    2023: {tool: i for i, tool in enumerate(top_tools)},
    2024: {tool: i + len(top_tools) for i, tool in enumerate(top_tools)}
}
source, target, percent_values, custom_labels = [], [], [], []
for from_tool in top_tools:
    for to_tool in top_tools:
        src = tool_indices_by_year[2023][from_tool]
        tgt = tool_indices_by_year[2024][to_tool]
        share_23 = pivot[(pivot['year'] == 2023) & (pivot['ai_tool'] == from_tool)]['adoption_share'].values
        share_24 = pivot[(pivot['year'] == 2024) & (pivot['ai_tool'] == to_tool)]['adoption_share'].values
        if len(share_23) > 0 and len(share_24) > 0:
            val = share_23[0] * share_24[0]
            percent_values.append(round(val * 100, 2))  # Store as percentage
            source.append(src)
            target.append(tgt)
            custom_labels.append([from_tool, to_tool])
fig = go.Figure(go.Sankey(
    node=dict(label=labels, pad=15, thickness=20),
    link=dict(
        source=source,
        target=target,
        value=percent_values,  # Use percentage as flow thickness
        customdata=custom_labels,
        hovertemplate='<b>%{customdata[0]}</b> → <b>%{customdata[1]}</b><br>'
                      '📊 Flow: %{value:.2f}%<extra></extra>'
    )
))
fig.update_layout(title_text="🔁 Top 3 AI Tool Switching (2023 → 2024)", font_size=12)
fig.show()

In [None]:
for col in ['country', 'industry', 'ai_tool', 'age_group', 'company_size']:
    print(f"Unique values in {col}: {df[col].nunique()}")
df_clean = df.drop(columns=['user_feedback'])
print('Missing values in each column:\n', df_clean.isnull().sum())
df_clean = df_clean.dropna()
print('Cleaned dataset shape:', df_clean.shape)

Unique values in country: 10
Unique values in industry: 8
Unique values in ai_tool: 5
Unique values in age_group: 5
Unique values in company_size: 3
Missing values in each column:
 country               0
industry              0
ai_tool               0
adoption_rate         0
daily_active_users    0
year                  0
age_group             0
company_size          0
dtype: int64
Cleaned dataset shape: (145000, 8)


In [None]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score, mean_squared_error, confusion_matrix

In [None]:
target = 'adoption_rate'
predictors = df_clean.drop(columns=[target, 'country'])
y = df_clean[target]
categorical_cols = ['industry', 'ai_tool', 'age_group', 'company_size']
predictors_encoded = pd.get_dummies(predictors, columns=categorical_cols, drop_first=True)
print('Shape of features after encoding:', predictors_encoded.shape)
X_train, X_test, y_train, y_test = train_test_split(predictors_encoded, y, test_size=0.2, random_state=42)
print(f'Training set size: {X_train.shape[0]} samples')
print(f'Test set size: {X_test.shape[0]} samples')
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
r2 = r2_score(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
print('Model Evaluation Metrics:')
print(f'R2 Score: {r2:.4f}')
print(f'Mean Squared Error: {mse:.4f}')

Shape of features after encoding: (145000, 19)
Training set size: 116000 samples
Test set size: 29000 samples
Model Evaluation Metrics:
R2 Score: -0.1288
Mean Squared Error: 938.0222
